Changeset - 0ee71f3564f4
[Not reviewed]
default
0 5 0
Laman - 8 years ago 2017-01-28 07:34:32

the parser is more predictive and reports errors
5 files changed with 119 insertions and 74 deletions:
0 comments (0 inline, 0 general)
src/sgfParser/__init__.py
Show inline comments
 
@@ -4,8 +4,24 @@ def skipWhitespace(s,start):
 
	return i
 

	
 

	
 
def lineNumber(s,i):
 
	k=0
 
	r=0
 
	for (r,line) in enumerate(s.splitlines(True)):
 
		k+=len(line)
 
		if k>=i: break
 
	return r+1
 

	
 

	
 
class ParserError(Exception):
 
	def __init__(self,line,col,message):
 
		self.line=line
 
		self.col=col
 
	def __init__(self,message,s,i):
 
		# self.line=line
 
		# self.col=col
 
		# !! check for i<len(s)
 
		print(message)
 
		print(s[i:])
 
		self.message=message
 

	
 

	
 
class ParserWarning(ParserError):
 
	pass
src/sgfParser/collection.py
Show inline comments
 
from sgfParser.node import Node
 
from . import skipWhitespace
 
from . import skipWhitespace, ParserError
 
 
 
class Collection:
 
	def __init__(self,s):
 
		self.gameTrees=[]
 
		i,x=GameTree.create(s,0)
 
		if x is None:
 
			print("error when parsing Collection")
 
			return
 
		while x is not None:
 
		i=skipWhitespace(s,0)
 
		if i>=len(s): return
 
		elif not GameTree.fits(s,i):
 
			raise ParserError("expected a GameTree starting with '('",s,i)
 
		while GameTree.fits(s,i):
 
			i,x=GameTree.create(s,i)
 
			self.gameTrees.append(x)
 
			i,x=GameTree.create(s,i)
 
		if i<len(s):
 
			raise ParserError("expected EOF",s,i)
 
 
	def listGames(self):
 
		for tree in self.gameTrees:
 
@@ -24,38 +26,38 @@ class GameTree:
 
		self.branches=[]
 
 
	@staticmethod
 
	def fits(s,i):
 
		return i<len(s) and s[i]=="("
 
 
	@staticmethod
 
	def create(s,start):
 
		assert GameTree.fits(s,start)
 
		res=GameTree()
 
		i=skipWhitespace(s,start)
 
		if i>=len(s) or s[i]!="(":
 
			# print("error when parsing GameTree")
 
			return (start,None)
 
		i,x=Node.create(s,i+1)
 
		if x is None:
 
			# print("error when parsing GameTree")
 
			return (i,None)
 
 
		i=skipWhitespace(s,start+1)
 
		if not Node.fits(s,i):
 
			raise ParserError("expected a Node starting with ';'",s,i)
 
 
		y=None
 
		while x is not None:
 
		while Node.fits(s,i):
 
			i,x=Node.create(s,i)
 
			res.nodes.append(x)
 
			if y: y.addChild(x)
 
			x.setParent(y)
 
			y=x
 
			i=skipWhitespace(s,i)
 
			i,x=Node.create(s, i)
 
		i=skipWhitespace(s,i)
 
		i,x=GameTree.create(s,i)
 
		while x is not None:
 
 
		while GameTree.fits(s,i):
 
			i,x=GameTree.create(s,i)
 
			res.branches.append(x)
 
			subroot=x.getNode(0)
 
			if subroot:
 
				subroot.setParent(y)
 
			subroot.setParent(y)
 
			if y: y.addChild(subroot)
 
			i=skipWhitespace(s,i)
 
			i,x=GameTree.create(s,i)
 
		if s[i]!=")":
 
			# print("error when parsing GameTree")
 
			return (i,None)
 
		return (i+1,res)
 
		if i>=len(s) or s[i]!=")":
 
			raise ParserError("expected end of the GameTree marked by ')'",s,i)
 
		i=skipWhitespace(s,i+1)
 
		return (i,res)
 
 
	## Expand multiple games into distinct GameTrees and yield each.
 
	def listGames(self):
src/sgfParser/node.py
Show inline comments
 
from . import skipWhitespace, ParserError
 
from . import skipWhitespace, ParserWarning
 
from .property import Property, GAME_INFO
 

	
 

	
 
@@ -9,21 +9,23 @@ class Node:
 
		self.children=[]
 

	
 
	@staticmethod
 
	def fits(s,i):
 
		return i<len(s) and s[i]==";"
 

	
 
	@staticmethod
 
	def create(s,start):
 
		assert Node.fits(s,start)
 
		res=Node()
 
		if s[start]!=";":
 
			# print("error when parsing Node")
 
			return (start,None)
 

	
 
		i=skipWhitespace(s,start+1)
 
		i,x=Property.create(s,start+1)
 
		while x is not None:
 
		while Property.fits(s,i):
 
			i,x=Property.create(s,i)
 
			if x.name in res.properties:
 
				print(res.properties)
 
				raise ParserError(0,0,'duplicate "{0}" property in node at position {1}. second value ignored'.format(x.name,start))
 
				# !! raise or log or ignore
 
				raise ParserWarning('duplicate "{0}" property in a node. second value ignored'.format(x.name),s,i)
 
			else:
 
				res.properties[x.name]=x
 
			i=skipWhitespace(s,i)
 
			i,x=Property.create(s,i)
 
		return (i,res)
 

	
 
	def isGINode(self):
src/sgfParser/property.py
Show inline comments
 
import re
 
from . import skipWhitespace, ParserError
 

	
 

	
 
GAME_INFO=1
 
@@ -28,36 +29,54 @@ class Point:
 
		return chr(a+self.c)+chr(a+self.r)
 

	
 

	
 
## Metatype matching one of the provided types.
 
#
 
# Returns the first match, so the order is important.
 
def choose(*vTypes):
 
	def f(s,start):
 
		for vType in vTypes:
 
			i,x=vType(s,start)
 
			if x is not None: return (i,x)
 
		return (start,None)
 
			try:
 
				i,x=vType(s,start)
 
				return (i,x)
 
			except ParserError: pass
 
		raise ParserError("no variant of a 'choose' property value matched",s,start)
 
	return f
 

	
 

	
 
def singletonFits(s,i):
 
	return i<len(s) and s[i]=="["
 

	
 

	
 
def singletonEnds(s,i):
 
	return i<len(s) and s[i]=="]"
 

	
 

	
 
def singleton(vType):
 
	def f(s,start):
 
		if s[start]!="[":
 
			return (start,None)
 
		if not singletonFits(s,start):
 
			raise ParserError("expected a property value starting with '['",s,start)
 
		i,x=vType(s,start+1)
 
		if x is None: return (start,None)
 
		if s[i]!="]":
 
			return (start,None)
 
		return (i+1,x)
 
		if not singletonEnds(s,i):
 
			raise ParserError("expected a property value ending with ']'",s,i)
 
		i=skipWhitespace(s,i+1)
 
		return (i,x)
 
	return f
 

	
 

	
 
def listOf(vType,allowEmpty=False):
 
	def f(s,start):
 
		res=[]
 
		i=start
 
		if not singletonFits(s,i):
 
			raise ParserError("expected a property value starting with '['",s,i)
 
		if singletonEnds(s,i+1) and allowEmpty:
 
			i=skipWhitespace(s,i+2)
 
			return (i,[])
 
		single=singleton(vType)
 
		i,x=single(s,start)
 
		while x!=None:
 
		i,x=single(s,i)
 
		res=[x]
 
		while singletonFits(s,i):
 
			i,x=single(s,i)
 
			res.append(x)
 
			i,x=single(s,i)
 
		if len(res)==0 and not allowEmpty: return (start,None)
 
		return (i,res)
 
	return f
 

	
 
@@ -65,9 +84,9 @@ def listOf(vType,allowEmpty=False):
 
def compose(vTypeA,vTypeB):
 
	def f(s,start):
 
		i,a=vTypeA(s,start)
 
		if a==None or s[i]!=":": return (start,None)
 
		if i>=len(s) or s[i]!=":":
 
			raise ParserError("a composed property value separated by ':' expected",s,i)
 
		i,b=vTypeB(s,i+1)
 
		if b==None: return start,None
 
		return (i,Composed(a,b))
 
	return f
 

	
 
@@ -75,7 +94,7 @@ def compose(vTypeA,vTypeB):
 
def number(s,start):
 
	r=re.compile(r"(\+|-|)\d+")
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a number matching '(\+|-|)\d+'",s,start)
 
	res=int(m.group(0))
 
	return (m.end(),res)
 

	
 
@@ -83,7 +102,7 @@ def number(s,start):
 
def real(s,start):
 
	r=re.compile(r"(\+|-|)\d+(\.\d+)?")
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a real number matching '(\+|-|)\d+(\.\d+)?'",s,start)
 
	res=float(m.group(0))
 
	return (m.end(),res)
 

	
 
@@ -91,7 +110,7 @@ def real(s,start):
 
def double(s,start):
 
	r=re.compile(r"1|2")
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a double value, either '1' or '2'",s,start)
 
	res=int(m.group(0))
 
	return (m.end(),res)
 

	
 
@@ -99,7 +118,7 @@ def double(s,start):
 
def color(s,start):
 
	r=re.compile(r"B|W")
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a color value, either 'B' or 'W'",s,start)
 
	return (m.end(),m.group(0))
 

	
 

	
 
@@ -108,6 +127,7 @@ def text(simple=True,composed=False):
 
		res=""
 
		esc=False
 
		lastC=""
 
		i=start
 
		for i,c in enumerate(s[start:],start):
 
			if esc:
 
				if c!="\n" and c!="\r": res+=c
 
@@ -144,7 +164,7 @@ def anything(s,start):
 
def point(s,start):
 
	r=re.compile(r"[a-zA-Z]{2}|") # !! limit to board size
 
	m=r.match(s,start)
 
	if m is None: return (start,None)
 
	if m is None: raise ParserError("expected a point value matching '[a-zA-Z]{2}|'",s,start)
 
	if m.group(0)=="": # pass, !! tt
 
		return (m.end(),tuple())
 
	col=m.group(0)[0]
 
@@ -158,29 +178,31 @@ stone=point
 

	
 

	
 
class Property:
 
	identRegexp=re.compile(r"[A-Z]+")
 

	
 
	def __init__(self):
 
		self.name=""
 
		self.value=""
 

	
 
	@staticmethod
 
	def fits(s,i):
 
		return i<len(s) and s[i].isupper()
 

	
 
	@staticmethod
 
	def create(s,start):
 
		assert Property.fits(s,start)
 
		res=Property()
 
		i,x=Property.ident(s,start)
 
		if x is None:
 
			return (start,None)
 
		res.name=x
 
		i,res.name=Property.ident(s,start)
 
		i=skipWhitespace(s,i)
 
		i,x=Property.createValue(s,i,res.name)
 
		if x is None:
 
			print('error when parsing property "{0}" at position {1}'.format(res.name,i))
 
			return (start,None)
 
		res.value=x
 
		i=skipWhitespace(s,i)
 
		return (i,res)
 

	
 
	@staticmethod
 
	def ident(s,start):
 
		r=re.compile(r"[A-Z]+")
 
		m=r.match(s,start)
 
		if m is None: return (start,None)
 
		m=Property.identRegexp.match(s,start)
 
		if m is None: raise ParserError("expected a property identifier matching '[A-Z]+'",s,start)
 
		return (m.end(),m.group())
 

	
 
	@staticmethod
 
@@ -188,8 +210,9 @@ class Property:
 
		if name in Property.patterns:
 
			return Property.patterns[name](s,start)
 
		else:
 
			print('warning, unknown property "{0}" at position {1}'.format(name,start))
 
			return singleton(anything)(s,start)
 
			# !! raise or log or ignore
 
			# print('warning, unknown property "{0}" at position {1}'.format(name,start))
 
			return choose(listOf(anything)(s,start), singleton(anything)(s,start))
 

	
 
	@property
 
	def type(self):
src/tests/testSgfParser.py
Show inline comments
 
@@ -11,8 +11,10 @@ dataDir=os.path.join(os.path.dirname(__f
 

	
 
class TestProperty(TestCase):
 
	def testName(self):
 
		self.assertEqual(Property.create("[99]",0), (0,None))
 
		self.assertEqual(Property.create("99[99]",0), (0,None))
 
		with self.assertRaises(AssertionError):
 
			Property.create("[99]",0)
 
		with self.assertRaises(AssertionError):
 
			Property.create("99[99]",0)
 

	
 
		i,prop=Property.create("MN[99]",0)
 
		self.assertNotEqual((i,prop), (0,None))
0 comments (0 inline, 0 general)