Files @ ce5c50aae93a
Branch filter:

Location: EGD-mine/queries.py

Laman
lifespans: přidán min_rating
import sys

from egd import connection


cursor=connection.cursor()


def yearStart(year):
	return "{0}-01-01".format(year)


def playerHistory(name,year=0):
	"""Výsledky hráče s daným jménem pro daný rok nebo pro celou historii."""
	query="""
		select name, datum, win_count, game_count, rating_after, rating_after-rating_before
			from players natural join players_performance as pp join tournaments as ts on pp.tournament_code=ts.code
			where name=? and datum>=date(?) and datum<date(?)
			order by datum asc
	"""
	if year:
		date1=yearStart(year)
		date2=yearStart(year+1)
	else:
		date1="1990-01-01"
		date2="2020-01-01"

	res=cursor.execute(query,(name,date1,date2))
	print("jméno\tdatum\tpočet výher\tpočet partií\trating po turnaji\tzměna")
	for rec in res:
		print("\t".join([str(x) for x in rec]))


def _yearByCountry(year):
	"""Počty hráčů s turnajem odehraným v daném roce, dělené podle země původu. Při změně příslušnosti se hráč započítá oběma zemím."""
	date1=yearStart(year)
	date2=yearStart(year+1)
	query="""
		select p_country_code,count(pin) as player_count from (
			select pin,count(*),p_country_code from
				players_performance_tournaments
				where datum>=date(?) and datum<date(?)
				group by pin,p_country_code
			)
			group by p_country_code
			order by p_country_code asc
		"""

	return cursor.execute(query,(date1,date2))


def yearsByCountry(year1,year2):
	res=dict()
	for year in range(year1,year2):
		records=_yearByCountry(year)
		for (countryCode,playerCount) in records:
			if countryCode not in res:
				res[countryCode]=dict()
			res[countryCode][year]=playerCount
		print("\r",year,end="",file=sys.stderr)
	print("\r",end="",file=sys.stderr)

	print("Počty aktivních hráčů podle let")
	print("kód země\t"+"\t".join(str(y) for y in range(year1,year2)))
	for countryCode in sorted(res.keys()):
		print(countryCode,end="\t")
		print("\t".join(str(res[countryCode].get(y,0)) for y in range(year1,year2)))


def timeRank(year,country):
	"""Výpis hráčů dané země aktivních v daném roce.

	Rating platí ke konci roku."""
	date1=yearStart(year)
	date2=yearStart(year+1)
	query="""
		select ppt.pin,date(?)-min_datum-1,min_datum,ppt.rating_after as rating,name
			from
				(select pin,min(datum) as min_datum,max(datum) as max_datum from players_performance_tournaments where datum<date(?) group by pin having max_datum>=date(?)) as subq
				join players_performance_tournaments as ppt on subq.pin=ppt.pin and subq.max_datum=ppt.datum natural join players
			where ppt.p_country_code=?
			order by rating desc
	"""

	res=cursor.execute(query,(date2,date2,date1,country))
	print("pin\tcelé roky hraní\tprvní turnaj\trating")
	for rec in res:
		print("\t".join([str(x) for x in rec]))


def _populationStrength(year,country):
	"""Rozdělení populace země v daném roce podle ratingu (po stovkách). Uvažují se pouze aktivní hráči."""
	date1=yearStart(year)
	date2=yearStart(year+1)
	query="""
		select round((rating_after-50)/100)*100 as rating, count(ppt.pin)
			from players_performance_tournaments as ppt
				join (select pin,max(datum) as max_datum from players_performance_tournaments where datum>=date(?) and datum<date(?) and p_country_code=? group by pin) as subq on ppt.pin=subq.pin and ppt.datum=subq.max_datum
			group by rating
			order by rating desc
	"""

	return cursor.execute(query,(date1,date2,country))


def populationStrength(year1,year2,country):
	res=dict()

	for year in range(year1,year2):
		records=_populationStrength(year,country)
		for (rating,playerCount) in records:
			if rating not in res:
				res[rating]=dict()
			res[rating][year]=playerCount
		print("\r",year,end="",file=sys.stderr)
	print("\r",end="",file=sys.stderr)

	print("Počty aktivních hráčů v {0} s uvedeným a vyšším ratingem".format(country))
	print("rating\t"+"\t".join(str(y) for y in range(year1,year2)))
	for rating in sorted(res.keys()):
		print(rating,end="\t")
		print("\t".join(str(res[rating].get(y,0)) for y in range(year1,year2)))


def _populationAge(year,country):
	"""Rozdělení populace země v daném roce podle doby hraní (celých let od prvního turnaje). Uvažují se pouze aktivní hráči."""
	date1=yearStart(year)
	date2=yearStart(year+1)
	query="""
		select date(?)-min_datum-1 as playing_years,count(pin)
			from (
				select ppt1.pin,min(ppt1.datum) as min_datum
					from players_performance_tournaments as ppt1
						join players_performance_tournaments as ppt2 on ppt1.pin=ppt2.pin
					where ppt2.datum>=date(?) and ppt2.datum<date(?) and ppt2.p_country_code=?
					group by ppt2.pin
			)
			group by playing_years
	"""
	return cursor.execute(query,(date2,date1,date2,country))


def populationAge(year1,year2,country):
	res=dict()

	for year in range(year1,year2):
		records=_populationAge(year,country)
		for (age,playerCount) in records:
			if age not in res:
				res[age]=dict()
			res[age][year]=playerCount
		print("\r",year,end="",file=sys.stderr)
	print("\r",end="",file=sys.stderr)

	print("Počty aktivních hráčů v {0} hrajících tolik a více let".format(country))
	print("roky hraní\t"+"\t".join(str(y) for y in range(year1,year2)))
	for age in sorted(res.keys()):
		print(age,end="\t")
		print("\t".join(str(res[age].get(y,0)) for y in range(year1,year2)))


def _tournamentAttendance(year):
	"""Návštěvnosti turnajů za daný rok, dělené podle země konání."""
	date1=yearStart(year)
	date2=yearStart(year+1)
	query="""select country_code,sum(player_count) from tournaments where datum>=date(?) and datum<date(?) group by country_code"""

	return cursor.execute(query,(date1,date2))


def tournamentAttendance(year1,year2):
	res=dict()

	for year in range(year1,year2):
		records=_tournamentAttendance(year)
		for (country,attendance) in records:
			if country not in res:
				res[country]=dict()
			res[country][year]=attendance
		print("\r",year,end="",file=sys.stderr)
	print("\r",end="",file=sys.stderr)

	print("Návštěvnosti (počty hráčů) na turnajích podle let")
	print("kód země\t"+"\t".join(str(y) for y in range(year1,year2)))
	for country in sorted(res.keys()):
		print(country,end="\t")
		print("\t".join(str(res[country].get(y,0)) for y in range(year1,year2)))


def fetchByRating(strongerThan,country=""):
	"""Hráči, kteří dosáhli rating strongerThan nebo vyšší, nepovinně filtrovaní podle země, seřazení od nejlepšího dosaženého ratingu."""
	query="""
		select pin,name,max(rating_after) as max_rating
			from players_performance natural join players
			where rating_after>=? and (?='' or country_code=?)
			group by pin
			order by max_rating desc"""

	res=cursor.execute(query,(strongerThan,country,country))
	print("pin\tjméno\tmax rating")
	for rec in res:
		print("\t".join([str(x) for x in rec]))


def _tournamentsToPeak():
	"""Vrací slovník {pin: počet_turnajů_k_dosažení_max_ratingu}"""
	query="""select pin,rating_after,max_rating from players_performance_tournaments join (select pin as sub_pin,max(rating_after) as max_rating from players_performance group by pin) on pin=sub_pin order by datum asc"""

	res=cursor.execute(query)
	players=dict()
	closed=set()
	for (pin,rating,maxRating) in res:
		tc=players.get(pin,0)
		if pin not in closed: players[pin]=tc+1
		if rating==maxRating: closed.add(pin)

	return players


def timeToPeak():
	"""Jakého hráči dosáhli maximálního ratingu a jak dlouho jim to trvalo.

	Uvažuje hráče začínající s ratingem<=600."""
	# join on pin, max_rating může duplikovat řádky, když měl hráč stejný max_rating vícekrát.
	# proto ten ošklivý vnější group by :-ú
	query="""
		select subq.pin,min(min_rating),min(max_rating),min(_min_datum) as min_datum,min(datum) as peak_datum,(julianday(min(datum))-julianday(min(_min_datum)))/365.25 as time_to_peak
			from players_performance_tournaments as ppt
				join (select pin,min(rating_before) as min_rating,max(rating_after) as max_rating,min(datum) as _min_datum from players_performance_tournaments group by pin having min_rating<=600) as subq on ppt.pin=subq.pin and ppt.rating_after=max_rating
			group by ppt.pin
	"""

	tournamentCounts=_tournamentsToPeak()
	res=cursor.execute(query)

	print("pin\tmin rating\tmax_rating\tprvní turnaj\tdosažení max ratingu\ttime_to_peak\tturnaje k max ratingu")
	for rec in res:
		pin=rec[0]
		print("\t".join([str(x) for x in rec]+[str(tournamentCounts[pin])]))


def playersProgress():
	"""Všem hráčům vypíše rating po jednotlivých letech. Zobrazuje rating na konci roku, zaokrouhlený dolů na stovky. Nula značí rok bez záznamu v databázi."""
	y1=1996
	y2=2018

	class Player:
		def __init__(self):
			self.rating=dict()
			self.country=""
			self.tournamentCount=0

	query="""
		select pin,p_country_code as country_code,t_count,cast(strftime('%Y',datum) as int) as year,rating_after,datum
			from players_performance_tournaments as ppt
				join (select pin as sub_pin,count(tournament_code) as t_count from players_performance group by pin) on ppt.pin=sub_pin
			order by datum desc"""
	res=cursor.execute(query)

	players=dict()
	for (i,rec) in enumerate(res):
		(pin,country,tournamentCount,year,rating,_)=rec
		if pin in players:
			p=players[pin]
		else:
			p=Player()
			p.country=country
			p.tournamentCount=tournamentCount
			players[pin]=p
		if year not in p.rating: p.rating[year]=round(rating//100*100)
		if i%1000==0: print(i,"/ ?","\r",end="",file=sys.stderr)
	print(" "*20,file=sys.stderr)

	print("pin\tkód země\tpočet turnajů\t"+"\t".join(str(y) for y in range(y1,y2)))
	for (pin,p) in players.items():
		print(pin,p.country,p.tournamentCount,*(p.rating.get(y,0) for y in range(y1,y2)),sep="\t")


def lifeSpans():
	"""Životnosti hráčů. Datum prvního a posledního turnaje, jejich rozdíl v letech, počet odehraných turnajů."""
	query="""select pin,min(datum) as min_datum,max(datum) as max_datum,(julianday(max(datum))-julianday(min(datum)))/365.25 as lifespan,count(datum) as t_count,max(rating_after) as max_rating,min(rating_before) as min_rating
		from players_performance_tournaments group by pin"""

	res=cursor.execute(query)

	print("pin\tmin datum\tmax datum\tmax-min\tpočet turnajů\tmax rating\tmin rating")
	for rec in res:
		print("\t".join([str(x) for x in rec]))


# playerHistory("Kotowski Jaroslav")
# timeRank(2017,"CZ")
# yearsByCountry(2001,2018)
# populationStrength(2001,2018,"RU")
# populationAge(2001,2018,"RU")
# tournamentAttendance(2001,2018)
# fetchByRating(2600,"CZ")
# timeToPeak()
# playersProgress()
lifeSpans()