import os
import sys
import random
import time
import urllib.request
from lxml import html
targetDir=""
links=sys.argv
for link in links:
time.sleep(1+random.random())
with urllib.request.urlopen(link) as req:
if req.getcode()!=200:
print("error when downloading {0}".format(link),file=sys.stderr)
continue
doc_str=req.read().decode("utf8")
doc=html.fromstring(doc_str)
license=doc.xpath("//div[@class='photo-license-info']//span/text()")[0].strip()
imgUrl=doc.xpath("//meta[@property='og:image']/@content")[0]
filename=imgUrl.split("/")[-1]
with urllib.request.urlopen(imgUrl) as req:
if req.getcode()!=200:
print("error when downloading {0}".format(link),file=sys.stderr)
continue
with open(os.path.join(targetDir,filename),mode="wb") as f:
f.write(req.read())
print("\t".join(map(str, [link,filename,license])))