OneEye Files · exp/collector.py

Files @ 5617054647db

Branch filter:

Location: OneEye/exp/collector.py - annotation

5617054647db 1.2 KiB text/x-python Show Source Show as Raw Download as Raw

Laman

grid construction and evaluation

a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
1d1fd2ae49bf
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
a1e2cc709d53
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
1d1fd2ae49bf
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
a1e2cc709d53
1d1fd2ae49bf

import os
import sys
import random
import time
import urllib.error
import urllib.request

from lxml import html


targetDir=sys.argv[1]
linksFile=sys.argv[2]
with open(linksFile) as f:
	links=f.readlines()

for link in links:
	link=link.strip()
	if link.startswith("#"): continue
	time.sleep(1+2*random.random())
	try:
		with urllib.request.urlopen(link) as req:
			if req.getcode()!=200:
				print("error when downloading {0}".format(link),file=sys.stderr)
				continue
			doc_str=req.read().decode("utf8")
	except urllib.error.HTTPError:
		print("error when downloading {0}".format(link),file=sys.stderr)
		continue

	doc=html.fromstring(doc_str)

	try:
		license=doc.xpath("//div[@class='photo-license-info']//span/text()")[0].strip()
		imgUrl=doc.xpath("//meta[@property='og:image']/@content")[0]
	except IndexError:
		print("error when parsing {0}".format(link),file=sys.stderr)
		continue
	filename=imgUrl.split("/")[-1].split("?")[0]

	with urllib.request.urlopen(imgUrl) as req:
		if req.getcode()!=200:
			print("error when downloading {0}".format(link),file=sys.stderr)
			continue
		with open(os.path.join(targetDir,filename),mode="wb") as f:
			f.write(req.read())

	print("\t".join(map(str, [link,filename,license])), flush=True)