|
| 1 | +from __future__ import print_function |
1 | 2 | import os
|
2 | 3 | import re
|
3 |
| -import urllib2 |
| 4 | +from six.moves.urllib.request import urlopen |
| 5 | +from six.moves.urllib.error import HTTPError |
4 | 6 | import shutil
|
5 | 7 | import argparse
|
6 | 8 | import mistune
|
7 | 9 | import bs4 as BeautifulSoup
|
| 10 | +import socket |
| 11 | +import time |
8 | 12 |
|
9 | 13 | def download_pdf(link, location, name):
|
10 | 14 | try:
|
11 |
| - response = urllib2.urlopen(link, timeout=500) |
| 15 | + response = urlopen(link, timeout=500) |
12 | 16 | file = open(os.path.join(location, name), 'w')
|
13 | 17 | file.write(response.read())
|
14 | 18 | file.close()
|
15 |
| - except urllib2.HTTPError: |
| 19 | + except HTTPError: |
16 | 20 | print('>>> Error 404: cannot be downloaded!\n')
|
17 | 21 | raise
|
18 | 22 | except socket.timeout:
|
19 | 23 | print(" ".join(("can't download", link, "due to connection timeout!")) )
|
| 24 | + raise |
20 | 25 |
|
21 | 26 | def clean_pdf_link(link):
|
22 | 27 | if 'arxiv' in link:
|
@@ -97,6 +102,13 @@ def shorten_title(title):
|
97 | 102 | fullname = '.'.join((name, ext))
|
98 | 103 | if not os.path.exists('/'.join((current_directory, fullname)) ):
|
99 | 104 | download_pdf(link, current_directory, '.'.join((name, ext)))
|
| 105 | + except KeyboardInterrupt: |
| 106 | + try: |
| 107 | + print("Press Ctrl-C in 1 second to quit") |
| 108 | + time.sleep(1) |
| 109 | + except KeyboardInterrupt: |
| 110 | + print("Cancelling..") |
| 111 | + break |
100 | 112 | except:
|
101 | 113 | failures.append(point.text)
|
102 | 114 |
|
|
0 commit comments