Merge pull request #1524 from epfl-scitas/bugfix/urls

stripping the links to remove spaces and '\n'
This commit is contained in:
Todd Gamblin 2016-08-15 13:42:38 -07:00 committed by GitHub
commit ae20e53cfb

View file

@ -109,7 +109,7 @@ def _spider(args):
while link_parser.links: while link_parser.links:
raw_link = link_parser.links.pop() raw_link = link_parser.links.pop()
abs_link = urlparse.urljoin(response_url, raw_link) abs_link = urlparse.urljoin(response_url, raw_link.strip())
links.add(abs_link) links.add(abs_link)