Changeset - bc762fdfee0f
[Not reviewed]
default
0 3 0
Laman - 7 years ago 2018-03-27 16:57:49

dokumentace, komentáře
3 files changed with 16 insertions and 0 deletions:
0 comments (0 inline, 0 general)
egd.py
Show inline comments
 
import sys
 
import re
 
import datetime
 
import sqlite3
 

	
 

	
 
T_CODE=r"[WEGT]\d{6}[A-Z]*"
 
NAME_CHARS=r"[\w().#_`?°-]"
 

	
 
connection=sqlite3.connect("egd.db")
 

	
 

	
 
def loadTourneys(filename,enc="iso8859_2"):
 
	# code class rounds players (country) description
 
	regexp=re.compile(r"(%s)\s+([A-D])\s+(\d+)\s+(\d+)\s+\(([A-Z]{2})\)\s+(.*)" % T_CODE)
 
	tournaments=dict()
 

	
 
	for line in open(filename,encoding=enc):
 
		line=line.strip()
 
		match=regexp.match(line)
 
		if match is None:
 
			print("ignored:",line,file=sys.stderr)
 
			continue
 
		(code,cls,rc,pc,cc,dsc)=match.groups()
 
		t=dict(zip(
 
			["code","class","roundCount","playerCount","countryCode","description"],
 
			(code,cls,int(rc),int(pc),cc,dsc)
 
		))
 
		d=datetime.datetime.strptime(t["code"][1:7],"%y%m%d")
 
		t["date"]=d.date().isoformat()
 

	
 
		tournaments[t["code"]]=t
 
	return tournaments
 

	
 

	
 
def loadPlayers(filename,enc="iso8859_2"):
 
	# pin name country club rank tournamentCode wins games gor1 gor2
 
	regexp=re.compile(r"(\d+)\s+(%s+(?: %s+)*)\s+([A-Z]{2})\s+([\w?.=\\-]+)\s+(\d+[kdp])\s+(%s)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)" % (NAME_CHARS,NAME_CHARS,T_CODE))
 
	players=[]
 

	
 
	for line in open(filename,encoding=enc):
 
		line=line.strip()
 
		if not line: continue
 

	
 
		match=regexp.match(line)
 
		if match is None:
 
			print("ignored:",line,file=sys.stderr)
 
			continue
 
		(pin,name,cc,cl,rank,tcode,wc,gc,r1,r2)=match.groups()
 
		record=dict(zip(
 
			["pin","name","countryCode","club","rank","tournamentCode","winCount","gameCount","rating1","rating2"],
 
			(int(pin),name,cc,cl,rank,tcode,int(wc),int(gc),int(r1),int(r2))
 
		))
 
		players.append(record)
 
	return players
 

	
 

	
 
def setupDB():
 
	cursor=connection.cursor()
 

	
 
	cursor.execute("""create table tournaments (code text primary key, datum text, class text, round_count integer, player_count integer, country_code text, description text)""")
 
	cursor.execute("""create table players (pin integer primary key, name text)""")
 
	cursor.execute("""create table players_performance (pin integer references players(pin), tournament_code text references tournaments(code), country_code text, club text, win_count integer, game_count integer, rating_before real, rating_after real, primary key (pin,tournament_code))""")
 

	
 
	cursor.execute("""
 
		create view players_performance_tournaments (pin,p_country_code,rating_before,rating_after,t_country_code,datum) as
 
			select pin,pp.country_code,rating_before,rating_after,ts.country_code,datum
 
				from players_performance as pp join tournaments as ts on pp.tournament_code=ts.code
 
	""")
 

	
 
	cursor.execute("""create index pp_country_ind on players_performance (country_code)""")
 
	cursor.execute("""create index t_datum_ind on tournaments (datum)""")
 

	
 
	connection.commit()
 

	
 

	
 
def fillDB(tournaments,players):
 
	cursor=connection.cursor()
 

	
 
	cursor.execute("""delete from players_performance""")
 
	cursor.execute("""delete from players""")
 
	cursor.execute("""delete from tournaments""")
 

	
 
	cursor.executemany(
 
		"""insert into tournaments (code,datum,class,round_count,player_count,country_code,description) values (:code,:date,:class,:roundCount,:playerCount,:countryCode,:description)""",
 
		tournaments.values()
 
	)
 

	
 
	cursor.executemany("""insert or ignore into players (pin,name) values (:pin,:name)""", players)
 

	
 
	cursor.executemany(
 
		"""insert into players_performance (pin,tournament_code,country_code,club,win_count,game_count,rating_before,rating_after) values (:pin,:tournamentCode,:countryCode,:club,:winCount,:gameCount,:rating1,:rating2)""",
 
		players
 
	)
 

	
 
	connection.commit()
queries.py
Show inline comments
 
from egd import connection
 

	
 

	
 
cursor=connection.cursor()
 

	
 

	
 
def yearStart(year):
 
	return "{0}-01-01".format(year)
 

	
 

	
 
def playerYear(name,year):
 
	"""Výsledky hráče s daným jménem pro daný rok."""
 
	query="""
 
		select name, datum, win_count, game_count, rating_after, rating_after-rating_before
 
			from players natural join players_performance as pp join tournaments as ts on pp.tournament_code=ts.code
 
			where name=? and datum>=date(?) and datum<date(?)
 
			order by datum asc
 
	"""
 
	date1=yearStart(year)
 
	date2=yearStart(year+1)
 

	
 
	res=cursor.execute(query,(name,date1,date2))
 
	print("jméno\tdatum\tpočet výher\tpočet partií\trating po turnaji\tzměna")
 
	for rec in res:
 
		print("\t".join([str(x) for x in rec]))
 

	
 

	
 
def yearByCountry(year):
 
	"""Počty hráčů s turnajem odehraným v daném roce, dělené podle země původu."""
 
	date1=yearStart(year)
 
	date2=yearStart(year+1)
 
	query="""
 
		select country_code,count(pin) from (
 
			select pin,count(*),pp.country_code from
 
				players_performance as pp join tournaments as ts on pp.tournament_code=ts.code
 
				where datum>=date(?) and datum<date(?)
 
				group by pin
 
			)
 
			group by country_code
 
			order by country_code asc;
 
		"""
 
	res=cursor.execute(query,(date1,date2))
 
	print("kód země\tpočet aktivních hráčů")
 
	for rec in res:
 
		print("\t".join([str(x) for x in rec]))
 

	
 

	
 
def timeRank(year,country):
 
	"""Výpis hráčů dané země aktivních v daném roce."""
 
	date1=yearStart(year)
 
	date2=yearStart(year+1)
 
	query="""
 
		select ppt.pin,date(?)-min_datum-1,min_datum,ppt.rating_after as rating
 
			from
 
				(select pin,min(datum) as min_datum,max(datum) as max_datum from players_performance_tournaments where datum<date(?) group by pin having max_datum>=date(?)) as subq
 
				join players_performance_tournaments as ppt on subq.pin=ppt.pin and subq.max_datum=ppt.datum
 
			where ppt.p_country_code=?"""
 

	
 
	res=cursor.execute(query,(date2,date2,date1,country))
 
	print("pin\tcelé roky hraní\tprvní turnaj\trating")
 
	for rec in res:
 
		print("\t".join([str(x) for x in rec]))
 

	
 

	
 
def populationStrength(year,country):
 
	"""Rozdělení populace země v daném roce podle ratingu (po stovkách). Uvažují se pouze aktivní hráči."""
 
	date1=yearStart(year)
 
	date2=yearStart(year+1)
 
	query="""
 
		select round((rating_after-50)/100)*100 as rating, count(ppt.pin)
 
			from players_performance_tournaments as ppt
 
				join (select pin,max(datum) as max_datum from players_performance_tournaments where datum>=date(?) and datum<date(?) and p_country_code=? group by pin) as subq on ppt.pin=subq.pin and ppt.datum=subq.max_datum
 
			group by rating
 
			order by rating desc
 
	"""
 

	
 
	res=cursor.execute(query,(date1,date2,country))
 
	print("rating\tpočet hráčů s takovým a vyšším ratingem")
 
	for rec in res:
 
		print("\t".join([str(x) for x in rec]))
 

	
 

	
 
def populationAge(year,country):
 
	"""Rozdělení populace země v daném roce podle doby hraní (celých let od prvního turnaje). Uvažují se pouze aktivní hráči."""
 
	date1=yearStart(year)
 
	date2=yearStart(year+1)
 
	query="""
 
		select date(?)-min_datum-1 as playing_years,count(pin)
 
			from (
 
				select ppt1.pin,min(ppt1.datum) as min_datum
 
					from players_performance_tournaments as ppt1
 
						join players_performance_tournaments as ppt2 on ppt1.pin=ppt2.pin
 
					where ppt2.datum>=date(?) and ppt2.datum<date(?) and ppt1.p_country_code=?
 
					group by ppt1.pin
 
			)
 
			group by playing_years"""
 

	
 
	res=cursor.execute(query,(date2,date1,date2,country))
 
	print("celé roky hraní\tpočet hráčů")
 
	for rec in res:
 
		print("\t".join([str(x) for x in rec]))
 

	
 

	
 
def tournamentAttendance(year):
 
	"""Návštěvnosti turnajů za daný rok, dělené podle země konání."""
 
	date1=yearStart(year)
 
	date2=yearStart(year+1)
 
	query="""select country_code,sum(player_count) from tournaments where datum>=date(?) and datum<date(?) group by country_code"""
 

	
 
	res=cursor.execute(query,(date1,date2))
 
	print("kód země\tpočet hráčů na turnajích")
 
	for rec in res:
 
		print("\t".join([str(x) for x in rec]))
 

	
 

	
 
# playerYear("Kotowski Jaroslav",2017)
 
# yearByCountry(2017)
 
# timeRank(2017,"CZ")
 
# populationStrength(2017,"CZ")
 
# populationAge(2017,"CZ")
 
# tournamentAttendance(2010)
readme.md
Show inline comments
 
# Zpracování a dotazování dat z EGD
 

	
 
## egd.py
 
Funkce pro načtení dat vyexportovaných z veřejného rozhraní EGD a jejich konverzi do SQLite databáze.
 

	
 
## setup.py
 
Načte soubory tlist.html a all.hst (z aktivního adresáře) a vytvoří soubor egd.db s databází.
 

	
 
## queries.py
 
Uložené dotazy.
 

	
 
## Schéma databáze
 
### tournaments
 
* (text) code
 
* (text) datum
 
* (text) class
 
* (integer) round_count
 
* (integer) player_count
 
* (text) country_code
 
* (text) description
 

	
 
### players
 
* (integer) pin
 
* (text) name
 

	
 
### players_performance
 
Co se vztahuje zároveň k hráči a k turnaji. Většina položek all.hst.
 
* (integer) pin
 
* (text) tournament_code
 
* (text) country_code
 
* (text) club
 
* (integer) win_count
 
* (integer) game_count
 
* (real) rating_before
 
* (real) rating_after
 

	
 
### players_performance_tournaments
 
Pohled na tournaments a players_performance.
 
* (integer) pin
 
* (text) p_country_code - země hráče
 
* (real) rating_before
 
* (real) rating_after
 
* (text) t_country_code - lokace turnaje
 
* (text) datum
0 comments (0 inline, 0 general)