Files @ a522e81d4f37
Branch filter:

Location: Regular-Expresso/regexp.py - annotation

Laman
implemented the plus operator
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
a522e81d4f37
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
a522e81d4f37
a522e81d4f37
a522e81d4f37
a522e81d4f37
a522e81d4f37
a522e81d4f37
a522e81d4f37
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
a522e81d4f37
a522e81d4f37
a522e81d4f37
a522e81d4f37
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
c316bec3c0cd
171b6c89df1d
c316bec3c0cd
a522e81d4f37
171b6c89df1d
a522e81d4f37
c316bec3c0cd
a522e81d4f37
171b6c89df1d
from abc import abstractmethod


class Token:
	is_skippable = False

	@abstractmethod
	def list_first(self):
		pass

	@abstractmethod
	def list_last(self):
		pass

	@abstractmethod
	def list_neighbours(self):
		pass


class Symbol(Token):
	def __init__(self, position, value):
		self.position = position
		self.value = value

	def list_first(self):
		yield self.position

	def list_last(self):
		yield self.position

	def list_neighbours(self):
		yield from []

	def __str__(self):
		return self.value


class Plus(Token):
	def __init__(self, content: Token):
		self.content = content

	def list_first(self):
		yield from self.content.list_first()

	def list_last(self):
		yield from self.content.list_last()

	def list_neighbours(self):
		yield from self.content.list_neighbours()
		for x in self.list_last():
			for y in self.list_first():
				yield (x, y)

	def __str__(self):
		return str(self.content) + "+"


class Asterisk(Plus):
	is_skippable = True
	
	def __str__(self):
		return str(self.content) + "*"


class Chain(Token):
	def __init__(self, content: list):
		self.content = content

	def list_first(self):
		for token in self.content:
			yield from token.list_first()
			if not token.is_skippable:
				break

	def list_last(self):
		for token in reversed(self.content):
			yield from token.list_last()
			if not token.is_skippable:
				break

	def list_neighbours(self):
		previous = []
		for token in self.content:
			for t in previous:
				for x in t.list_last():
					for y in token.list_first():
						yield (x, y)
			yield from token.list_neighbours()

			if token.is_skippable:
				previous.append(token)
			else:
				previous = [token]

	def __str__(self):
		return "(" + "".join(str(x) for x in self.content) + ")"


def find_closing_parenthesis(pattern, k):
	counter = 0

	for (i, c) in enumerate(pattern[k:]):
		if c == "(":
			counter += 1
		elif c == ")":
			counter -= 1
		if counter == 0:
			return k+i

	return None


def parse(pattern, offset=0):
	res = []

	i = 0
	while i < len(pattern):
		c = pattern[i]
		if c == "(":
			j = find_closing_parenthesis(pattern, i)
			inner_content = parse(pattern[i+1:j], offset+i+1)
			res.append(inner_content)
			i = j+1
		elif c == "*":
			token = res.pop()
			res.append(Asterisk(token))
			i += 1
		elif c == "+":
			token = res.pop()
			res.append(Plus(token))
			i += 1
		else:
			res.append(Symbol(i+offset, c))
			i += 1

	return Chain(res)


class Regexp:
	def __init__(self, s):
		(self.rules, self.end_states) = self._parse(s)

	def _parse(self, s):
		r = parse(s)
		rules = dict()

		for i in r.list_first():
			c = s[i]
			key = (-1, c)
			if key not in rules:
				rules[key] = set()
			rules[key].add(i)

		for (i, j) in r.list_neighbours():
			c = s[j]
			key = (i, c)
			if key not in rules:
				rules[key] = set()
			rules[key].add(j)

		end_states = set(r.list_last())

		return rules, end_states

	def match(self, s):
		current = {-1}

		for c in s:
			new_state = set()
			for st in current:
				key = (st, c)
				if key in self.rules:
					new_state.update(self.rules[key])
			current = new_state

		return any(st in self.end_states for st in current)


if __name__ == "__main__":
	tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]
	for pattern in ["a*b*", "a+b+", "(ab)*", "(ab)+", "a((bc)*d)*"]:
		print("#", pattern)
		r = Regexp(pattern)
		for t in tests:
			print(t, r.match(t))
		print()