File tree Expand file tree Collapse file tree 1 file changed +70
-0
lines changed Expand file tree Collapse file tree 1 file changed +70
-0
lines changed Original file line number Diff line number Diff line change
1
+ from bs4 import BeautifulSoup
2
+ import urllib2
3
+ import wget
4
+ import dryscrape
5
+
6
+
7
+ print "Welcome to Xampaperz Downloader \n "
8
+
9
+
10
+
11
+
12
+ #The million dollar base url
13
+ url = "http://xampaperz.com/cse.php"
14
+
15
+
16
+ #Variables
17
+ filter_stuff = [] #used in function make_things_alright as a temporary array which stores useful links
18
+ chapters = [] #This array will store all the useful links from webpage discarding other links
19
+ pdf_links = []
20
+
21
+ def get_page (url ):
22
+ f = urllib2 .urlopen (url )
23
+ page = f .read ()
24
+ f .close ()
25
+ return page
26
+
27
+ def get_next_target (page ):
28
+ start_link = page .find ('<a href="' )
29
+ if start_link == - 1 :
30
+ return None , 0
31
+ start_quote = page .find ('"' , start_link )
32
+ end_quote = page .find ('"' , start_quote + 1 )
33
+ url = page [start_quote + 1 :end_quote ]
34
+ return url , end_quote
35
+
36
+ def get_all_links (page ):
37
+ links = []
38
+ while True :
39
+ url ,endpos = get_next_target (page )
40
+ if url :
41
+ links .append (url )
42
+ page = page [endpos :]
43
+ else :
44
+ break
45
+ return links
46
+
47
+ def find_viewjpg_links (y ):
48
+
49
+ for plink in y :
50
+
51
+
52
+ filename = wget .download (plink )
53
+
54
+
55
+
56
+ all_links = get_all_links (get_page (url ))
57
+ new_links = []
58
+ new_links2 = []
59
+
60
+ for x in all_links :
61
+
62
+ if "1stsem" in x or "2ndsem" in x or "3rdsem" in x or "thsem" in x :
63
+ x = "http://xampaperz.com/" + x
64
+ new_links .append (x )
65
+ session = dryscrape .Session ()
66
+ session .visit (x )
67
+ response = session .body ()
68
+ print response
69
+ soup = BeautifulSoup (response )
70
+ print soup .get_text ()
You can’t perform that action at this time.
0 commit comments