diff --git a/src/sgfParser/propValues.py b/src/sgfParser/propValues.py old mode 100644 new mode 100755 --- a/src/sgfParser/propValues.py +++ b/src/sgfParser/propValues.py @@ -7,6 +7,15 @@ class Regexp: number=re.compile(r"(\+|-|)\d+") real=re.compile(r"(\+|-|)\d+(\.\d+)?") point=re.compile(r"[a-zA-Z]{2}|") + text=re.compile(r"(?:.*?[^\\])??(?:\\\\)*(?=])", re.DOTALL) + composedText=re.compile(r"(?:.*?[^\\])??(?:\\\\)*(?=]|:)", re.DOTALL) + + class Text: + softBreaks=re.compile(r"(^|[^\\])((\\\\)*)\\((\n\r)|(\r\n)|\r|\n)") + whitespace=re.compile(r"[\t\f\v]") + simpleWhitespace=re.compile(r"[\t\f\v\n\r]") + removeSlashes=re.compile(r"(^|[^\\])((\\\\)*)\\($|[^\\])") + unescapeSlashes=re.compile(r"\\\\") class Composed: @@ -124,27 +133,18 @@ def color(s,start): def text(simple=True,composed=False): def f(s,start): - res="" - esc=False - lastC="" - i=start - for i,c in enumerate(s[start:],start): - if esc: - if c!="\n" and c!="\r": res+=c - esc=False - elif (c=="\n" and lastC=="\r") or (c=="\r" and lastC=="\n"): pass - elif c=="\r" or c=="\n" and not simple: - res+="\n" - elif c.isspace(): - res+=" " - elif c=="\\": - esc=True - elif c=="]" or (c==":" and composed): - break - else: - res+=c - lastC=c - return (i,res) + regexps=Regexp.Text + m=Regexp.composedText.match(s,start) if composed else Regexp.text.match(s,start) + res=m.group(0) + res=regexps.softBreaks.sub(r"\1\2",res) # remove soft line breaks + if simple: + res=regexps.simpleWhitespace.sub(" ",res) # convert whitespace to spaces, no escapes + else: + res=regexps.whitespace.sub(" ",res) # convert whitespace to spaces, no escapes + res=regexps.removeSlashes.sub(r"\1\2\4",res) + res=regexps.unescapeSlashes.sub(r"\\",res) # unescape slashes + + return (m.end(),res) return f @@ -153,6 +153,7 @@ def empty(s,start): return (start,"") def anything(s,start): esc=False + i=start for i,c in enumerate(s[start:],start): if esc: esc=False elif c=="\\": esc=True