Changeset - 5982aac96259
[Not reviewed]
default
0 2 0
Laman - 8 years ago 2017-02-08 22:36:20

optimized text parsing
2 files changed with 38 insertions and 21 deletions:
0 comments (0 inline, 0 general)
src/sgfParser/propValues.py
Show inline comments
 
modified file chmod 100644 => 100755
 
@@ -7,6 +7,15 @@ class Regexp:
 
	number=re.compile(r"(\+|-|)\d+")
 
	real=re.compile(r"(\+|-|)\d+(\.\d+)?")
 
	point=re.compile(r"[a-zA-Z]{2}|")
 
	text=re.compile(r"(?:.*?[^\\])??(?:\\\\)*(?=])", re.DOTALL)
 
	composedText=re.compile(r"(?:.*?[^\\])??(?:\\\\)*(?=]|:)", re.DOTALL)
 

	
 
	class Text:
 
		softBreaks=re.compile(r"(^|[^\\])((\\\\)*)\\((\n\r)|(\r\n)|\r|\n)")
 
		whitespace=re.compile(r"[\t\f\v]")
 
		simpleWhitespace=re.compile(r"[\t\f\v\n\r]")
 
		removeSlashes=re.compile(r"(^|[^\\])((\\\\)*)\\($|[^\\])")
 
		unescapeSlashes=re.compile(r"\\\\")
 

	
 

	
 
class Composed:
 
@@ -124,27 +133,18 @@ def color(s,start):
 

	
 
def text(simple=True,composed=False):
 
	def f(s,start):
 
		res=""
 
		esc=False
 
		lastC=""
 
		i=start
 
		for i,c in enumerate(s[start:],start):
 
			if esc:
 
				if c!="\n" and c!="\r": res+=c
 
				esc=False
 
			elif (c=="\n" and lastC=="\r") or (c=="\r" and lastC=="\n"): pass
 
			elif c=="\r" or c=="\n" and not simple:
 
				res+="\n"
 
			elif c.isspace():
 
				res+=" "
 
			elif c=="\\":
 
				esc=True
 
			elif c=="]" or (c==":" and composed):
 
				break
 
			else:
 
				res+=c
 
			lastC=c
 
		return (i,res)
 
		regexps=Regexp.Text
 
		m=Regexp.composedText.match(s,start) if composed else Regexp.text.match(s,start)
 
		res=m.group(0)
 
		res=regexps.softBreaks.sub(r"\1\2",res) # remove soft line breaks
 
		if simple:
 
			res=regexps.simpleWhitespace.sub(" ",res) # convert whitespace to spaces, no escapes
 
		else:
 
			res=regexps.whitespace.sub(" ",res) # convert whitespace to spaces, no escapes
 
		res=regexps.removeSlashes.sub(r"\1\2\4",res)
 
		res=regexps.unescapeSlashes.sub(r"\\",res) # unescape slashes
 

	
 
		return (m.end(),res)
 
	return f
 

	
 

	
 
@@ -153,6 +153,7 @@ def empty(s,start): return (start,"")
 

	
 
def anything(s,start):
 
	esc=False
 
	i=start
 
	for i,c in enumerate(s[start:],start):
 
		if esc: esc=False
 
		elif c=="\\": esc=True
src/tests/testSgfParser.py
Show inline comments
 
@@ -6,6 +6,7 @@ import os
 
from sgfParser import strRowCol
 
from sgfParser.collection import Collection
 
from sgfParser.property import Property
 
from sgfParser.propValues import text,compose
 

	
 

	
 
dataDir=os.path.join(os.path.dirname(__file__), "data")
 
@@ -37,6 +38,21 @@ class TestProperty(TestCase):
 
		self.assertNotEqual((i,prop), (0,None))
 
		self.assertEqual((i,prop.name), (6,"MN"))
 

	
 
	def testText(self):
 
		s=r"""[abc\
 
def
 
ghi]"""
 
		self.assertEqual(text()(s,1)[1], "abcdef ghi")
 
		self.assertEqual(text(False)(s,1)[1], "abcdef\nghi")
 

	
 
		s="""[m\\no\\\tpqr\\]\\\\]"""
 
		self.assertEqual(text()(s,1)[1], "mno pqr]\\")
 
		self.assertEqual(text(False)(s,1)[1], "mno pqr]\\")
 

	
 
		s="""[abc:def]"""
 
		parsed=compose(text(composed=True),text(composed=True))(s,1)
 
		self.assertEqual(str(parsed[1]), "abc:def")
 

	
 

	
 
class TestCollection(TestCase):
 
	def testSubtrees(self):
0 comments (0 inline, 0 general)