Skip to content

Commit 2706ad7

Browse files
committed
Create numericalmethod.py
Let's Begin
1 parent 1e69226 commit 2706ad7

File tree

1 file changed

+76
-0
lines changed

1 file changed

+76
-0
lines changed

numericalmethod.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from bs4 import BeautifulSoup
2+
import urllib2
3+
import wget
4+
5+
6+
print "Welcome to mathforcollege.com Numerical Method Chapters Downloader \n"
7+
8+
9+
10+
11+
#The million dollar base url
12+
url="http://nm.mathforcollege.com/topics/index.html"
13+
14+
15+
#Variables
16+
filter_stuff=[] #used in function make_things_alright as a temporary array which stores useful links
17+
chapters=[] #This array will store all the useful links from webpage discarding other links
18+
pdf_links=[]
19+
20+
def get_page(url):
21+
f = urllib2.urlopen(url)
22+
page = f.read()
23+
f.close()
24+
return page
25+
26+
27+
def get_next_target(page):
28+
start_link = page.find('<a href="')
29+
if start_link == -1:
30+
return None, 0
31+
start_quote = page.find('"', start_link)
32+
end_quote = page.find('"', start_quote + 1)
33+
url = page[start_quote + 1:end_quote]
34+
return url, end_quote
35+
36+
def get_all_links(page):
37+
links = []
38+
while True:
39+
url,endpos = get_next_target(page)
40+
if url:
41+
links.append(url)
42+
page = page[endpos:]
43+
else:
44+
break
45+
return links
46+
47+
def find_pdf_links(y):
48+
49+
for plink in y:
50+
if ('.pdf' in plink and 'http://mathforcollege.com/nm/' in plink and 'ppt' not in plink and 'problem' not in plink and 'quiz' not in plink and 'example' not in plink):
51+
#collect_links.append(plink)
52+
filename=wget.download(plink)
53+
54+
def make_things_alright(y):
55+
#iterators
56+
i=0
57+
for text in y:
58+
if 'http:' not in text and 'https:' not in text and 'mailto' not in text:
59+
filter_stuff.append(text)
60+
filter_stuff[i]='http://nm.mathforcollege.com/topics/'+text
61+
i=i+1
62+
return filter_stuff
63+
64+
all_links= get_all_links(get_page(url))
65+
chapters = make_things_alright(all_links) #filtering links
66+
67+
68+
#iterators
69+
i=0
70+
71+
for text in chapters:
72+
i=i+1
73+
print "Downloading Chapter "+ i
74+
find_pdf_links(get_all_links(get_page(text)))
75+
76+
print "Download Complete!"

0 commit comments

Comments
 (0)