Skip to content

Commit 52fc3c3

Browse files
author
Flood Sung
authored
Merge pull request floodsung#38 from suicidedamsel/IncrementalAndTimeout
Incremental and timeout
2 parents c615f48 + a04d98c commit 52fc3c3

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

download.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@
88

99
def download_pdf(link, location, name):
1010
try:
11-
response = urllib2.urlopen(link)
11+
response = urllib2.urlopen(link, timeout=500)
1212
file = open(os.path.join(location, name), 'w')
1313
file.write(response.read())
1414
file.close()
1515
except urllib2.HTTPError:
1616
print('>>> Error 404: cannot be downloaded!\n')
1717
raise
18+
except socket.timeout:
19+
print(" ".join(("can't download", link, "due to connection timeout!")) )
1820

1921
def clean_pdf_link(link):
2022
if 'arxiv' in link:
@@ -81,7 +83,8 @@ def shorten_title(title):
8183
current_directory = h1_directory
8284
elif point.name == 'h2':
8385
current_directory = os.path.join(h1_directory, clean_text(point.text))
84-
os.makedirs(current_directory)
86+
if not os.path.exists(current_directory):
87+
os.makedirs(current_directory)
8588
print_title(point.text)
8689

8790
if point.name == 'p':
@@ -93,7 +96,9 @@ def shorten_title(title):
9396
print(shorten_title(point.text) + ' (' + link + ')')
9497
try:
9598
name = clean_text(point.text.split('[' + ext + ']')[0])
96-
download_pdf(link, current_directory, '.'.join((name, ext)))
99+
fullname = '.'.join((name, ext))
100+
if not os.path.exists('/'.join((current_directory, fullname)) ):
101+
download_pdf(link, current_directory, '.'.join((name, ext)))
97102
except:
98103
failures.append(point.text)
99104

0 commit comments

Comments
 (0)