Files
@ 7cb01d4080c9
Branch filter:
Location: OneEye/exp/collector.py - annotation
7cb01d4080c9
1.2 KiB
text/x-python
a hinted neural network (failed)
a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 1d1fd2ae49bf a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf a1e2cc709d53 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf 1d1fd2ae49bf a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 a1e2cc709d53 1d1fd2ae49bf | import os
import sys
import random
import time
import urllib.error
import urllib.request
from lxml import html
targetDir=sys.argv[1]
linksFile=sys.argv[2]
with open(linksFile) as f:
links=f.readlines()
for link in links:
link=link.strip()
if link.startswith("#"): continue
time.sleep(1+2*random.random())
try:
with urllib.request.urlopen(link) as req:
if req.getcode()!=200:
print("error when downloading {0}".format(link),file=sys.stderr)
continue
doc_str=req.read().decode("utf8")
except urllib.error.HTTPError:
print("error when downloading {0}".format(link),file=sys.stderr)
continue
doc=html.fromstring(doc_str)
try:
license=doc.xpath("//div[@class='photo-license-info']//span/text()")[0].strip()
imgUrl=doc.xpath("//meta[@property='og:image']/@content")[0]
except IndexError:
print("error when parsing {0}".format(link),file=sys.stderr)
continue
filename=imgUrl.split("/")[-1].split("?")[0]
with urllib.request.urlopen(imgUrl) as req:
if req.getcode()!=200:
print("error when downloading {0}".format(link),file=sys.stderr)
continue
with open(os.path.join(targetDir,filename),mode="wb") as f:
f.write(req.read())
print("\t".join(map(str, [link,filename,license])), flush=True)
|