Merge pull request #1524 from epfl-scitas/bugfix/urls
stripping the links to remove spaces and '\n'
This commit is contained in:
commit
ae20e53cfb
1 changed files with 1 additions and 1 deletions
|
@ -109,7 +109,7 @@ def _spider(args):
|
||||||
|
|
||||||
while link_parser.links:
|
while link_parser.links:
|
||||||
raw_link = link_parser.links.pop()
|
raw_link = link_parser.links.pop()
|
||||||
abs_link = urlparse.urljoin(response_url, raw_link)
|
abs_link = urlparse.urljoin(response_url, raw_link.strip())
|
||||||
|
|
||||||
links.add(abs_link)
|
links.add(abs_link)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue