Skip to content

Commit d918d2a

Browse files
committed
qpdwnldr
1 parent 33fda72 commit d918d2a

File tree

1 file changed

+70
-0
lines changed

1 file changed

+70
-0
lines changed

ques_paper/xam.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from bs4 import BeautifulSoup
2+
import urllib2
3+
import wget
4+
import dryscrape
5+
6+
7+
print "Welcome to Xampaperz Downloader \n"
8+
9+
10+
11+
12+
#The million dollar base url
13+
url="http://xampaperz.com/cse.php"
14+
15+
16+
#Variables
17+
filter_stuff=[] #used in function make_things_alright as a temporary array which stores useful links
18+
chapters=[] #This array will store all the useful links from webpage discarding other links
19+
pdf_links=[]
20+
21+
def get_page(url):
22+
f = urllib2.urlopen(url)
23+
page = f.read()
24+
f.close()
25+
return page
26+
27+
def get_next_target(page):
28+
start_link = page.find('<a href="')
29+
if start_link == -1:
30+
return None, 0
31+
start_quote = page.find('"', start_link)
32+
end_quote = page.find('"', start_quote + 1)
33+
url = page[start_quote + 1:end_quote]
34+
return url, end_quote
35+
36+
def get_all_links(page):
37+
links = []
38+
while True:
39+
url,endpos = get_next_target(page)
40+
if url:
41+
links.append(url)
42+
page = page[endpos:]
43+
else:
44+
break
45+
return links
46+
47+
def find_viewjpg_links(y):
48+
49+
for plink in y:
50+
51+
52+
filename=wget.download(plink)
53+
54+
55+
56+
all_links= get_all_links(get_page(url))
57+
new_links=[]
58+
new_links2=[]
59+
60+
for x in all_links:
61+
62+
if "1stsem" in x or "2ndsem" in x or "3rdsem" in x or "thsem" in x:
63+
x="http://xampaperz.com/"+x
64+
new_links.append(x)
65+
session = dryscrape.Session()
66+
session.visit(x)
67+
response = session.body()
68+
print response
69+
soup = BeautifulSoup(response)
70+
print soup.get_text()

0 commit comments

Comments
 (0)