import re import sys def compressValues(s): vals=[] indicesX=[] for m in re.finditer(r"\[.*?[^\\]\]",s,flags=re.DOTALL): vals.append(m.group(0)[1:-1]) indicesX.append(m.span()) i=len(indicesX)-1 for start,end in reversed(indicesX): s=s[:start+1]+"$"+str(i)+s[end-1:] i-=1 return s,vals s=open("c:/Users/Laman/Documents/go/EuroGoTV1-x.sgf",encoding="utf8").read() s,d=compressValues(s) print(s) print(d[:20]) sys.exit(0) # # # # http://en.wikipedia.org/wiki/Recursive_descent_parser # # # import collections import re Token = collections.namedtuple('Token', ['typ', 'value']) def tokenize(s): keywords = {'IF', 'THEN', 'ENDIF', 'FOR', 'NEXT', 'GOSUB', 'RETURN'} token_specification = [ ('PROPID',r'[A-Z]+'), ('PROPVAL',r'\[\$\d+\]'), ('NODE',r';'), ('LPARENTHESIS',r'\('), ('RPARENTHESIS',r'\)'), ('SKIP',r'\s') ] tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification) get_token = re.compile(tok_regex).match line = 1 pos = line_start = 0 mo = get_token(s) while mo is not None: typ = mo.lastgroup if typ == 'NEWLINE': line_start = pos line += 1 elif typ != 'SKIP': val = mo.group(typ) if typ == 'ID' and val in keywords: typ = val yield Token(typ, val, line, mo.start()-line_start) pos = mo.end() mo = get_token(s, pos) if pos != len(s): raise RuntimeError('Unexpected character %r on line %d' %(s[pos], line)) statements = ''' IF quantity THEN total := total + price * quantity; tax := price * 0.05; ENDIF; ''' for token in tokenize(statements): print(token) tokens=["list of tokens"] i=0 sym def getSym(): sym=token[i] i+=1 def accept(s): if sym==s: getSym() return True else: return False def expect(s): if accept(s): return True else: pass # error return False def propValue(): if accept(lbracket) and cValueType() and expect(rbracket): pass else: pass # error def propIdent(): accept(ident) def propertyX(): propIdent() while propValue(): pass def node(): accept(semicolon) propertyX() def sequence(): while node(): pass def gameTree(): accept(lparenthesis) sequence() while gameTree(): pass expect(rparenthesis) def collection(): while gameTree(): pass