Changeset - 0ee71f3564f4
[Not reviewed]
default
0 5 0
Laman - 8 years ago 2017-01-28 07:34:32

the parser is more predictive and reports errors
5 files changed with 119 insertions and 74 deletions:
0 comments (0 inline, 0 general)
src/sgfParser/__init__.py
Show inline comments
 
def skipWhitespace(s,start):
 
	i=start
 
	while i<len(s) and s[i].isspace(): i+=1
 
	return i
 

	
 

	
 
def lineNumber(s,i):
 
	k=0
 
	r=0
 
	for (r,line) in enumerate(s.splitlines(True)):
 
		k+=len(line)
 
		if k>=i: break
 
	return r+1
 

	
 

	
 
class ParserError(Exception):
 
	def __init__(self,line,col,message):
 
		self.line=line
 
		self.col=col
 
	def __init__(self,message,s,i):
 
		# self.line=line
 
		# self.col=col
 
		# !! check for i<len(s)
 
		print(message)
 
		print(s[i:])
 
		self.message=message
 

	
 

	
 
class ParserWarning(ParserError):
 
	pass
src/sgfParser/collection.py
Show inline comments
 
from sgfParser.node import Node
 
from . import skipWhitespace
 
from . import skipWhitespace, ParserError
 
 
 
class Collection:
 
	def __init__(self,s):
 
		self.gameTrees=[]
 
		i,x=GameTree.create(s,0)
 
		if x is None:
 
			print("error when parsing Collection")
 
			return
 
		while x is not None:
 
		i=skipWhitespace(s,0)
 
		if i>=len(s): return
 
		elif not GameTree.fits(s,i):
 
			raise ParserError("expected a GameTree starting with '('",s,i)
 
		while GameTree.fits(s,i):
 
			i,x=GameTree.create(s,i)
 
			self.gameTrees.append(x)
 
			i,x=GameTree.create(s,i)
 
		if i<len(s):
 
			raise ParserError("expected EOF",s,i)
 
 
	def listGames(self):
 
		for tree in self.gameTrees:
 
			for game in tree.listGames(): yield game
 
 
 
class GameTree:
 
	def __init__(self):
 
		self.nodes=[]
 
		self.branches=[]
 
 
	@staticmethod
 
	def fits(s,i):
 
		return i<len(s) and s[i]=="("
 
 
	@staticmethod
 
	def create(s,start):
 
		assert GameTree.fits(s,start)
 
		res=GameTree()
 
		i=skipWhitespace(s,start)
 
		if i>=len(s) or s[i]!="(":
 
			# print("error when parsing GameTree")
 
			return (start,None)
 
		i,x=Node.create(s,i+1)
 
		if x is None:
 
			# print("error when parsing GameTree")
 
			return (i,None)
 
 
		i=skipWhitespace(s,start+1)
 
		if not Node.fits(s,i):
 
			raise ParserError("expected a Node starting with ';'",s,i)
 
 
		y=None
 
		while x is not None:
 
		while Node.fits(s,i):
 
			i,x=Node.create(s,i)
 
			res.nodes.append(x)
 
			if y: y.addChild(x)
 
			x.setParent(y)
 
			y=x
 
			i=skipWhitespace(s,i)
 
			i,x=Node.create(s, i)
 
		i=skipWhitespace(s,i)
 
		i,x=GameTree.create(s,i)
 
		while x is not None:
 
 
		while GameTree.fits(s,i):
 
			i,x=GameTree.create(s,i)
 
			res.branches.append(x)
 
			subroot=x.getNode(0)
 
			if subroot:
 
				subroot.setParent(y)
 
			subroot.setParent(y)
 
			if y: y.addChild(subroot)
 
			i=skipWhitespace(s,i)
 
			i,x=GameTree.create(s,i)
 
		if s[i]!=")":
 
			# print("error when parsing GameTree")
 
			return (i,None)
 
		return (i+1,res)
 
		if i>=len(s) or s[i]!=")":
 
			raise ParserError("expected end of the GameTree marked by ')'",s,i)
 
		i=skipWhitespace(s,i+1)
 
		return (i,res)
 
 
	## Expand multiple games into distinct GameTrees and yield each.
 
	def listGames(self):
 
		for node in self._listGINodes():
 
			yield self._buildSubtree(node)
 
 
	def getNode(self,i):
 
		if 0<=i<len(self.nodes):
 
			return self.nodes[i]
 
		return None
 
 
	## Create and return a new game tree containing the provided Node.
 
	#
 
	# Ancestor nodes are copied, descendants are shared.
 
	def _buildSubtree(self,seedNode):
 
		node=seedNode.copy()
 
 
		while node.parent:
 
			newNode=node.parent.copy()
 
			node.parent=newNode
 
			newNode.setChildren([node])
 
			node=newNode
 
 
		return node
 
 
	## Find and yield Game Info nodes.
 
	def _listGINodes(self):
 
		for node in self.nodes:
 
			if node.isGINode():
 
				yield node
 
		for tree in self.branches:
 
			for node in tree._listGINodes():
 
				yield node
src/sgfParser/node.py
Show inline comments
 
from . import skipWhitespace, ParserError
 
from . import skipWhitespace, ParserWarning
 
from .property import Property, GAME_INFO
 

	
 

	
 
class Node:
 
	def __init__(self):
 
		self.properties=dict()
 
		self.parent=None
 
		self.children=[]
 

	
 
	@staticmethod
 
	def fits(s,i):
 
		return i<len(s) and s[i]==";"
 

	
 
	@staticmethod
 
	def create(s,start):
 
		assert Node.fits(s,start)
 
		res=Node()
 
		if s[start]!=";":
 
			# print("error when parsing Node")
 
			return (start,None)
 

	
 
		i=skipWhitespace(s,start+1)
 
		i,x=Property.create(s,start+1)
 
		while x is not None:
 
		while Property.fits(s,i):
 
			i,x=Property.create(s,i)
 
			if x.name in res.properties:
 
				print(res.properties)
 
				raise ParserError(0,0,'duplicate "{0}" property in node at position {1}. second value ignored'.format(x.name,start))
 
				# !! raise or log or ignore
 
				raise ParserWarning('duplicate "{0}" property in a node. second value ignored'.format(x.name),s,i)
 
			else:
 
				res.properties[x.name]=x
 
			i=skipWhitespace(s,i)
 
			i,x=Property.create(s,i)
 
		return (i,res)
 

	
 
	def isGINode(self):
 
		return any(prop.type==GAME_INFO for prop in self.properties.values())
 

	
 
	def setProperty(self,name,value):
 
		self.properties[name]=value
 
		# zkontrolovat typ value
 

	
 
	def setParent(self,node):
 
		self.parent=node
 

	
 
	def setChildren(self,children):
 
		self.children=children
 

	
 
	def addChild(self,node):
 
		if node in self.children: return node
 
		self.children.append(node)
 
		return node
 

	
 
	def removeChild(self,node):
 
		if node not in self.children:
 
			return None
 
		del self.children[self.children.index(node)]
 
		return node
 

	
 
	def removeChildAt(self,i):
 
		if -len(self.children)<i<len(self.children):
 
			res=self.children[i]
 
			del self.children[i]
 
			return res
 
		return None
 

	
 
	## Create a copy of the Node, with deep copied propeties and shallow copied parent and children.
 
	def copy(self):
 
		res=Node()
 
		res.properties={k: v.copy() for (k,v) in self.properties.items()}
 
		res.parent=self.parent
 
		res.setChildren(self.children[:])
 
		return res
 

	
 
	def getProperty(self,name):
 
		if name in self.properties: return self.properties[name]
 
		else: return None
 

	
 
	## Returns textual representation of the Node itself, but disregards its children.
 
	def __str__(self):
 
		return ";" + "".join(str(p) for p in self.properties.values())
src/sgfParser/property.py
Show inline comments
 
import re
 
from . import skipWhitespace, ParserError
 

	
 

	
 
GAME_INFO=1
 
UNKNOWN=99
 

	
 

	
 
class Composed:
 
	def __init__(self,a=None,b=None):
 
		self.a=a
 
		self.b=b
 

	
 
	def __str__(self):
 
		return "{0}:{1}".format(self.a,self.b)
 

	
 

	
 
class Point:
 
	def __init__(self,c,r):
 
		self.r=r
 
		self.c=c
 

	
 
	def __iter__(self):
 
		yield self.c
 
		yield self.r
 

	
 
	def __str__(self):
 
		a=ord("a")
 
		return chr(a+self.c)+chr(a+self.r)
 

	
 

	
 
## Metatype matching one of the provided types.
 
#
 
# Returns the first match, so the order is important.
 
def choose(*vTypes):
 
	def f(s,start):
 
		for vType in vTypes:
 
			i,x=vType(s,start)
 
			if x is not None: return (i,x)
 
		return (start,None)
 
			try:
 
				i,x=vType(s,start)
 
				return (i,x)
 
			except ParserError: pass
 
		raise ParserError("no variant of a 'choose' property value matched",s,start)
 
	return f
 

	
 

	
 
def singletonFits(s,i):
 
	return i<len(s) and s[i]=="["
 

	
 

	
 
def singletonEnds(s,i):
 
	return i<len(s) and s[i]=="]"
 

	
 

	
 
def singleton(vType):
 
	def f(s,start):
 
		if s[start]!="[":
 
			return (start,None)
 
		if not singletonFits(s,start):
 
			raise ParserError("expected a property value starting with '['",s,start)
 
		i,x=vType(s,start+1)
 
		if x is None: return (start,None)
 
		if s[i]!="]":
 
			return (start,None)
 
		return (i+1,x)
 
		if not singletonEnds(s,i):
 
			raise ParserError("expected a property value ending with ']'",s,i)
 
		i=skipWhitespace(s,i+1)
 
		return (i,x)
 
	return f
 

	
 

	
 
def listOf(vType,allowEmpty=False):
 
	def f(s,start):
 
		res=[]
 
		i=start
 
		if not singletonFits(s,i):
 
			raise ParserError("expected a property value starting with '['",s,i)
 
		if singletonEnds(s,i+1) and allowEmpty:
 
			i=skipWhitespace(s,i+2)
 
			return (i,[])
 
		single=singleton(vType)
 
		i,x=single(s,start)
 
		while x!=None:
 
		i,x=single(s,i)
 
		res=[x]
 
		while singletonFits(s,i):
 
			i,x=single(s,i)
 
			res.append(x)
 
			i,x=single(s,i)
 
		if len(res)==0 and not allowEmpty: return (start,None)
 
		return (i,res)
 
	return f
 

	
 

	
 
def compose(vTypeA,vTypeB):
 
	def f(s,start):
 
		i,a=vTypeA(s,start)
 
		if a==None or s[i]!=":": return (start,None)
 
		if i>=len(s) or s[i]!=":":
 
			raise ParserError("a composed property value separated by ':' expected",s,i)
 
		i,b=vTypeB(s,i+1)
 
		if b==None: return start,None
 
		return (i,Composed(a,b))
 
	return f
 

	
 

	
 
def number(s,start):
 
	r=re.compile(r"(\+|-|)\d+")
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a number matching '(\+|-|)\d+'",s,start)
 
	res=int(m.group(0))
 
	return (m.end(),res)
 

	
 

	
 
def real(s,start):
 
	r=re.compile(r"(\+|-|)\d+(\.\d+)?")
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a real number matching '(\+|-|)\d+(\.\d+)?'",s,start)
 
	res=float(m.group(0))
 
	return (m.end(),res)
 

	
 

	
 
def double(s,start):
 
	r=re.compile(r"1|2")
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a double value, either '1' or '2'",s,start)
 
	res=int(m.group(0))
 
	return (m.end(),res)
 

	
 

	
 
def color(s,start):
 
	r=re.compile(r"B|W")
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a color value, either 'B' or 'W'",s,start)
 
	return (m.end(),m.group(0))
 

	
 

	
 
def text(simple=True,composed=False):
 
	def f(s,start):
 
		res=""
 
		esc=False
 
		lastC=""
 
		i=start
 
		for i,c in enumerate(s[start:],start):
 
			if esc:
 
				if c!="\n" and c!="\r": res+=c
 
				esc=False
 
			elif (c=="\n" and lastC=="\r") or (c=="\r" and lastC=="\n"): pass
 
			elif c=="\r" or c=="\n" and not simple:
 
				res+="\n"
 
			elif c.isspace():
 
				res+=" "
 
			elif c=="\\":
 
				esc=True
 
			elif c=="]" or (c==":" and composed):
 
				break
 
			else:
 
				res+=c
 
			lastC=c
 
		return (i,res)
 
	return f
 

	
 

	
 
def empty(s,start): return (start,"")
 

	
 

	
 
def anything(s,start):
 
	esc=False
 
	for i,c in enumerate(s[start:],start):
 
		if esc: esc=False
 
		elif c=="\\": esc=True
 
		elif c=="]": break
 
	return (i,s[start:i])
 

	
 

	
 
# go specific
 
def point(s,start):
 
	r=re.compile(r"[a-zA-Z]{2}|") # !! limit to board size
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a point value matching '[a-zA-Z]{2}|'",s,start)
 
	if m.group(0)=="": # pass, !! tt
 
		return (m.end(),tuple())
 
	col=m.group(0)[0]
 
	row=m.group(0)[1]
 
	col=ord(col)-(ord("a") if "a"<=col<="z" else ord("A")-26)
 
	row=ord(row)-(ord("a") if "a"<=row<="z" else ord("A")-26)
 
	return (m.end(),Point(col,row))
 

	
 
move=point
 
stone=point
 

	
 

	
 
class Property:
 
	identRegexp=re.compile(r"[A-Z]+")
 

	
 
	def __init__(self):
 
		self.name=""
 
		self.value=""
 

	
 
	@staticmethod
 
	def fits(s,i):
 
		return i<len(s) and s[i].isupper()
 

	
 
	@staticmethod
 
	def create(s,start):
 
		assert Property.fits(s,start)
 
		res=Property()
 
		i,x=Property.ident(s,start)
 
		if x is None:
 
			return (start,None)
 
		res.name=x
 
		i,res.name=Property.ident(s,start)
 
		i=skipWhitespace(s,i)
 
		i,x=Property.createValue(s,i,res.name)
 
		if x is None:
 
			print('error when parsing property "{0}" at position {1}'.format(res.name,i))
 
			return (start,None)
 
		res.value=x
 
		i=skipWhitespace(s,i)
 
		return (i,res)
 

	
 
	@staticmethod
 
	def ident(s,start):
 
		r=re.compile(r"[A-Z]+")
 
		m=r.match(s,start)
 
		if m is None: return (start,None)
 
		m=Property.identRegexp.match(s,start)
 
		if m is None: raise ParserError("expected a property identifier matching '[A-Z]+'",s,start)
 
		return (m.end(),m.group())
 

	
 
	@staticmethod
 
	def createValue(s,start,name):
 
		if name in Property.patterns:
 
			return Property.patterns[name](s,start)
 
		else:
 
			print('warning, unknown property "{0}" at position {1}'.format(name,start))
 
			return singleton(anything)(s,start)
 
			# !! raise or log or ignore
 
			# print('warning, unknown property "{0}" at position {1}'.format(name,start))
 
			return choose(listOf(anything)(s,start), singleton(anything)(s,start))
 

	
 
	@property
 
	def type(self):
 
		gameInfo={"AN","BR","BT","CP","DT","EV","GN","GC","ON","OT","PB","PC","PW","RE","RO","RU","SO","TM","US","WR","WT"}
 
		if self.name in gameInfo: return GAME_INFO
 
		else: return UNKNOWN
 

	
 
	def copy(self):
 
		res=Property()
 
		res.name=self.name
 
		res.value=self.value if not isinstance(self.value,list) else self.value[:]
 
		return res
 

	
 
	def __str__(self):
 
		val="[{0}]".format(self.value) if not isinstance(self.value,list) else "".join("[{0}]".format(x) for x in self.value)
 
		return "{0}{1}".format(self.name,val)
 

	
 
	patterns={
 
		"B":singleton(move),
 
		"KO":singleton(empty),
 
		"MN":singleton(number),
 
		"W":singleton(move),
 
		"AB":listOf(stone), #
 
		"AE":listOf(point), #
 
		"AW":listOf(stone), #
 
		"PL":singleton(color),
 
		"C":singleton(text(simple=False)),
 
		"DM":singleton(double),
 
		"GB":singleton(double),
 
		"GW":singleton(double),
 
		"HO":singleton(double),
 
		"N":singleton(text()),
 
		"UC":singleton(double),
 
		"V":singleton(real),
 
		"BM":singleton(double),
 
		"DO":singleton(empty),
 
		"IT":singleton(empty),
 
		"TE":singleton(double),
 
		"AR":listOf(compose(point,point)), #
 
		"CR":listOf(point), #
 
		"DD":listOf(point,allowEmpty=True), #
 
		"LB":listOf(compose(point,text())), #
 
		"LN":listOf(compose(point,point)), #
 
		"MA":listOf(point), #
 
		"SL":listOf(point), #
 
		"SQ":listOf(point), #
 
		"TR":listOf(point), #
 
		"AP":singleton(compose(text(composed=True),text())), #
src/tests/testSgfParser.py
Show inline comments
 
import unittest
 
from unittest import TestCase
 
import os
 

	
 
from sgfParser.collection import Collection
 
from sgfParser.property import Property
 

	
 

	
 
dataDir=os.path.join(os.path.dirname(__file__), "data")
 

	
 

	
 
class TestProperty(TestCase):
 
	def testName(self):
 
		self.assertEqual(Property.create("[99]",0), (0,None))
 
		self.assertEqual(Property.create("99[99]",0), (0,None))
 
		with self.assertRaises(AssertionError):
 
			Property.create("[99]",0)
 
		with self.assertRaises(AssertionError):
 
			Property.create("99[99]",0)
 

	
 
		i,prop=Property.create("MN[99]",0)
 
		self.assertNotEqual((i,prop), (0,None))
 
		self.assertEqual((i,prop.name), (6,"MN"))
 

	
 

	
 
class TestCollection(TestCase):
 
	def testSubtrees(self):
 
		c=Collection("""
 
(;B[aa]
 
	(;W[ab]PB[Some Black]PW[Some White];B[ac])
 
	(;W[bb]PB[Other Black]PW[Other White])
 
)""")
 
		games=list(c.listGames())
 

	
 
		self.assertEqual(len(games),2)
 
		self.assertRegex(games[0].export(), r"^\(;B\[aa];(PB\[Some Black]|PW\[Some White]|W\[ab]){3};B\[ac]\)$")
 
		self.assertRegex(games[1].export(), r"^\(;B\[aa];(PB\[Other Black]|PW\[Other White]|W\[bb]){3}\)$")
 

	
 
	def testEmptySgf(self):
 
		Collection("(;)")
 

	
 
	def testSimpleSgf(self):
 
		with open(os.path.join(dataDir, "simple.sgf")) as f:
 
			Collection(f.read())
 

	
 
	def testComplexSgf(self):
 
		with open(os.path.join(dataDir, "kogos.sgf")) as f:
 
			Collection(f.read())
 

	
 
if __name__ == '__main__':
 
	unittest.main()
0 comments (0 inline, 0 general)