diff --git a/src/sgfParser/propValues.py b/src/sgfParser/propValues.py old mode 100644 new mode 100755 --- a/src/sgfParser/propValues.py +++ b/src/sgfParser/propValues.py @@ -7,6 +7,15 @@ class Regexp: number=re.compile(r"(\+|-|)\d+") real=re.compile(r"(\+|-|)\d+(\.\d+)?") point=re.compile(r"[a-zA-Z]{2}|") + text=re.compile(r"(?:.*?[^\\])??(?:\\\\)*(?=])", re.DOTALL) + composedText=re.compile(r"(?:.*?[^\\])??(?:\\\\)*(?=]|:)", re.DOTALL) + + class Text: + softBreaks=re.compile(r"(^|[^\\])((\\\\)*)\\((\n\r)|(\r\n)|\r|\n)") + whitespace=re.compile(r"[\t\f\v]") + simpleWhitespace=re.compile(r"[\t\f\v\n\r]") + removeSlashes=re.compile(r"(^|[^\\])((\\\\)*)\\($|[^\\])") + unescapeSlashes=re.compile(r"\\\\") class Composed: @@ -124,27 +133,18 @@ def color(s,start): def text(simple=True,composed=False): def f(s,start): - res="" - esc=False - lastC="" - i=start - for i,c in enumerate(s[start:],start): - if esc: - if c!="\n" and c!="\r": res+=c - esc=False - elif (c=="\n" and lastC=="\r") or (c=="\r" and lastC=="\n"): pass - elif c=="\r" or c=="\n" and not simple: - res+="\n" - elif c.isspace(): - res+=" " - elif c=="\\": - esc=True - elif c=="]" or (c==":" and composed): - break - else: - res+=c - lastC=c - return (i,res) + regexps=Regexp.Text + m=Regexp.composedText.match(s,start) if composed else Regexp.text.match(s,start) + res=m.group(0) + res=regexps.softBreaks.sub(r"\1\2",res) # remove soft line breaks + if simple: + res=regexps.simpleWhitespace.sub(" ",res) # convert whitespace to spaces, no escapes + else: + res=regexps.whitespace.sub(" ",res) # convert whitespace to spaces, no escapes + res=regexps.removeSlashes.sub(r"\1\2\4",res) + res=regexps.unescapeSlashes.sub(r"\\",res) # unescape slashes + + return (m.end(),res) return f @@ -153,6 +153,7 @@ def empty(s,start): return (start,"") def anything(s,start): esc=False + i=start for i,c in enumerate(s[start:],start): if esc: esc=False elif c=="\\": esc=True diff --git a/src/tests/testSgfParser.py b/src/tests/testSgfParser.py --- a/src/tests/testSgfParser.py +++ b/src/tests/testSgfParser.py @@ -6,6 +6,7 @@ import os from sgfParser import strRowCol from sgfParser.collection import Collection from sgfParser.property import Property +from sgfParser.propValues import text,compose dataDir=os.path.join(os.path.dirname(__file__), "data") @@ -37,6 +38,21 @@ class TestProperty(TestCase): self.assertNotEqual((i,prop), (0,None)) self.assertEqual((i,prop.name), (6,"MN")) + def testText(self): + s=r"""[abc\ +def +ghi]""" + self.assertEqual(text()(s,1)[1], "abcdef ghi") + self.assertEqual(text(False)(s,1)[1], "abcdef\nghi") + + s="""[m\\no\\\tpqr\\]\\\\]""" + self.assertEqual(text()(s,1)[1], "mno pqr]\\") + self.assertEqual(text(False)(s,1)[1], "mno pqr]\\") + + s="""[abc:def]""" + parsed=compose(text(composed=True),text(composed=True))(s,1) + self.assertEqual(str(parsed[1]), "abc:def") + class TestCollection(TestCase): def testSubtrees(self):