Files
@ 02615be35737
Branch filter:
Location: EGD-mine/queries.py
02615be35737
10.2 KiB
text/x-python
dotaz lifeSpans
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | import sys
from egd import connection
cursor=connection.cursor()
def yearStart(year):
return "{0}-01-01".format(year)
def playerHistory(name,year=0):
"""Výsledky hráče s daným jménem pro daný rok nebo pro celou historii."""
query="""
select name, datum, win_count, game_count, rating_after, rating_after-rating_before
from players natural join players_performance as pp join tournaments as ts on pp.tournament_code=ts.code
where name=? and datum>=date(?) and datum<date(?)
order by datum asc
"""
if year:
date1=yearStart(year)
date2=yearStart(year+1)
else:
date1="1990-01-01"
date2="2020-01-01"
res=cursor.execute(query,(name,date1,date2))
print("jméno\tdatum\tpočet výher\tpočet partií\trating po turnaji\tzměna")
for rec in res:
print("\t".join([str(x) for x in rec]))
def _yearByCountry(year):
"""Počty hráčů s turnajem odehraným v daném roce, dělené podle země původu. Při změně příslušnosti se hráč započítá oběma zemím."""
date1=yearStart(year)
date2=yearStart(year+1)
query="""
select p_country_code,count(pin) as player_count from (
select pin,count(*),p_country_code from
players_performance_tournaments
where datum>=date(?) and datum<date(?)
group by pin,p_country_code
)
group by p_country_code
order by p_country_code asc
"""
return cursor.execute(query,(date1,date2))
def yearsByCountry(year1,year2):
res=dict()
for year in range(year1,year2):
records=_yearByCountry(year)
for (countryCode,playerCount) in records:
if countryCode not in res:
res[countryCode]=dict()
res[countryCode][year]=playerCount
print("\r",year,end="",file=sys.stderr)
print("\r",end="",file=sys.stderr)
print("Počty aktivních hráčů podle let")
print("kód země\t"+"\t".join(str(y) for y in range(year1,year2)))
for countryCode in sorted(res.keys()):
print(countryCode,end="\t")
print("\t".join(str(res[countryCode].get(y,0)) for y in range(year1,year2)))
def timeRank(year,country):
"""Výpis hráčů dané země aktivních v daném roce.
Rating platí ke konci roku."""
date1=yearStart(year)
date2=yearStart(year+1)
query="""
select ppt.pin,date(?)-min_datum-1,min_datum,ppt.rating_after as rating,name
from
(select pin,min(datum) as min_datum,max(datum) as max_datum from players_performance_tournaments where datum<date(?) group by pin having max_datum>=date(?)) as subq
join players_performance_tournaments as ppt on subq.pin=ppt.pin and subq.max_datum=ppt.datum natural join players
where ppt.p_country_code=?
order by rating desc
"""
res=cursor.execute(query,(date2,date2,date1,country))
print("pin\tcelé roky hraní\tprvní turnaj\trating")
for rec in res:
print("\t".join([str(x) for x in rec]))
def _populationStrength(year,country):
"""Rozdělení populace země v daném roce podle ratingu (po stovkách). Uvažují se pouze aktivní hráči."""
date1=yearStart(year)
date2=yearStart(year+1)
query="""
select round((rating_after-50)/100)*100 as rating, count(ppt.pin)
from players_performance_tournaments as ppt
join (select pin,max(datum) as max_datum from players_performance_tournaments where datum>=date(?) and datum<date(?) and p_country_code=? group by pin) as subq on ppt.pin=subq.pin and ppt.datum=subq.max_datum
group by rating
order by rating desc
"""
return cursor.execute(query,(date1,date2,country))
def populationStrength(year1,year2,country):
res=dict()
for year in range(year1,year2):
records=_populationStrength(year,country)
for (rating,playerCount) in records:
if rating not in res:
res[rating]=dict()
res[rating][year]=playerCount
print("\r",year,end="",file=sys.stderr)
print("\r",end="",file=sys.stderr)
print("Počty aktivních hráčů v {0} s uvedeným a vyšším ratingem".format(country))
print("rating\t"+"\t".join(str(y) for y in range(year1,year2)))
for rating in sorted(res.keys()):
print(rating,end="\t")
print("\t".join(str(res[rating].get(y,0)) for y in range(year1,year2)))
def _populationAge(year,country):
"""Rozdělení populace země v daném roce podle doby hraní (celých let od prvního turnaje). Uvažují se pouze aktivní hráči."""
date1=yearStart(year)
date2=yearStart(year+1)
query="""
select date(?)-min_datum-1 as playing_years,count(pin)
from (
select ppt1.pin,min(ppt1.datum) as min_datum
from players_performance_tournaments as ppt1
join players_performance_tournaments as ppt2 on ppt1.pin=ppt2.pin
where ppt2.datum>=date(?) and ppt2.datum<date(?) and ppt2.p_country_code=?
group by ppt2.pin
)
group by playing_years
"""
return cursor.execute(query,(date2,date1,date2,country))
def populationAge(year1,year2,country):
res=dict()
for year in range(year1,year2):
records=_populationAge(year,country)
for (age,playerCount) in records:
if age not in res:
res[age]=dict()
res[age][year]=playerCount
print("\r",year,end="",file=sys.stderr)
print("\r",end="",file=sys.stderr)
print("Počty aktivních hráčů v {0} hrajících tolik a více let".format(country))
print("roky hraní\t"+"\t".join(str(y) for y in range(year1,year2)))
for age in sorted(res.keys()):
print(age,end="\t")
print("\t".join(str(res[age].get(y,0)) for y in range(year1,year2)))
def _tournamentAttendance(year):
"""Návštěvnosti turnajů za daný rok, dělené podle země konání."""
date1=yearStart(year)
date2=yearStart(year+1)
query="""select country_code,sum(player_count) from tournaments where datum>=date(?) and datum<date(?) group by country_code"""
return cursor.execute(query,(date1,date2))
def tournamentAttendance(year1,year2):
res=dict()
for year in range(year1,year2):
records=_tournamentAttendance(year)
for (country,attendance) in records:
if country not in res:
res[country]=dict()
res[country][year]=attendance
print("\r",year,end="",file=sys.stderr)
print("\r",end="",file=sys.stderr)
print("Návštěvnosti (počty hráčů) na turnajích podle let")
print("kód země\t"+"\t".join(str(y) for y in range(year1,year2)))
for country in sorted(res.keys()):
print(country,end="\t")
print("\t".join(str(res[country].get(y,0)) for y in range(year1,year2)))
def fetchByRating(strongerThan,country=""):
"""Hráči, kteří dosáhli rating strongerThan nebo vyšší, nepovinně filtrovaní podle země, seřazení od nejlepšího dosaženého ratingu."""
query="""
select pin,name,max(rating_after) as max_rating
from players_performance natural join players
where rating_after>=? and (?='' or country_code=?)
group by pin
order by max_rating desc"""
res=cursor.execute(query,(strongerThan,country,country))
print("pin\tjméno\tmax rating")
for rec in res:
print("\t".join([str(x) for x in rec]))
def _tournamentsToPeak():
"""Vrací slovník {pin: počet_turnajů_k_dosažení_max_ratingu}"""
query="""select pin,rating_after,max_rating from players_performance_tournaments join (select pin as sub_pin,max(rating_after) as max_rating from players_performance group by pin) on pin=sub_pin order by datum asc"""
res=cursor.execute(query)
players=dict()
closed=set()
for (pin,rating,maxRating) in res:
tc=players.get(pin,0)
if pin not in closed: players[pin]=tc+1
if rating==maxRating: closed.add(pin)
return players
def timeToPeak():
"""Jakého hráči dosáhli maximálního ratingu a jak dlouho jim to trvalo.
Uvažuje hráče začínající s ratingem<=600."""
# join on pin, max_rating může duplikovat řádky, když měl hráč stejný max_rating vícekrát.
# proto ten ošklivý vnější group by :-ú
query="""
select subq.pin,min(min_rating),min(max_rating),min(_min_datum) as min_datum,min(datum) as peak_datum,(julianday(min(datum))-julianday(min(_min_datum)))/365.25 as time_to_peak
from players_performance_tournaments as ppt
join (select pin,min(rating_before) as min_rating,max(rating_after) as max_rating,min(datum) as _min_datum from players_performance_tournaments group by pin having min_rating<=600) as subq on ppt.pin=subq.pin and ppt.rating_after=max_rating
group by ppt.pin
"""
tournamentCounts=_tournamentsToPeak()
res=cursor.execute(query)
print("pin\tmin rating\tmax rating\tprvní turnaj\tdosažení max ratingu\troky k max ratingu\tturnaje k max ratingu")
for rec in res:
pin=rec[0]
print("\t".join([str(x) for x in rec]+[str(tournamentCounts[pin])]))
def playersProgress():
"""Všem hráčům vypíše rating po jednotlivých letech. Zobrazuje rating na konci roku, zaokrouhlený dolů na stovky. Nula značí rok bez záznamu v databázi."""
y1=1996
y2=2018
class Player:
def __init__(self):
self.rating=dict()
self.country=""
self.tournamentCount=0
query="""
select pin,p_country_code as country_code,t_count,cast(strftime('%Y',datum) as int) as year,rating_after,datum
from players_performance_tournaments as ppt
join (select pin as sub_pin,count(tournament_code) as t_count from players_performance group by pin) on ppt.pin=sub_pin
order by datum desc"""
res=cursor.execute(query)
players=dict()
for (i,rec) in enumerate(res):
(pin,country,tournamentCount,year,rating,_)=rec
if pin in players:
p=players[pin]
else:
p=Player()
p.country=country
p.tournamentCount=tournamentCount
players[pin]=p
if year not in p.rating: p.rating[year]=round(rating//100*100)
if i%1000==0: print(i,"/ ?","\r",end="",file=sys.stderr)
print(" "*20,file=sys.stderr)
print("pin\tkód země\tpočet turnajů\t"+"\t".join(str(y) for y in range(y1,y2)))
for (pin,p) in players.items():
print(pin,p.country,p.tournamentCount,*(p.rating.get(y,0) for y in range(y1,y2)),sep="\t")
def lifeSpans():
"""Životnosti hráčů. Datum prvního a posledního turnaje, jejich rozdíl v letech, počet odehraných turnajů."""
query="""select pin,min(datum) as min_datum,max(datum) as max_datum,(julianday(max(datum))-julianday(min(datum)))/365.25 as lifespan,count(datum) as t_count
from players_performance_tournaments group by pin"""
res=cursor.execute(query)
print("pin\tmin datum\tmax datum\tmax-min\tpočet turnajů")
for rec in res:
print("\t".join([str(x) for x in rec]))
# playerHistory("Kotowski Jaroslav")
# timeRank(2017,"CZ")
# yearsByCountry(2001,2018)
# populationStrength(2001,2018,"RU")
# populationAge(2001,2018,"RU")
# tournamentAttendance(2001,2018)
# fetchByRating(2600,"CZ")
# timeToPeak()
# playersProgress()
# lifeSpans()
|