Skip to content

Commit 8cebae8

Browse files
committed
working
1 parent e8c71e4 commit 8cebae8

File tree

2 files changed

+210
-14
lines changed

2 files changed

+210
-14
lines changed

ques_paper/gh.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ def makeup(x,y):
6161

6262
links=[]
6363

64-
f=open("adsa.txt","w")
64+
f=open("forextrastuff.txt","w")
6565
f.write(page.encode("utf-8"))
6666
f.close()
67-
f=open("adsa.txt","r")
67+
f=open("forextrastuff.txt","r")
6868
page=f.read()
6969
f.close()
7070
links=get_all_links(page)
@@ -81,10 +81,10 @@ def makeup(x,y):
8181
page=browser.page_source
8282
links=[]
8383

84-
f=open("adsa.txt","w")
84+
f=open("forextrastuff.txt","w")
8585
f.write(page.encode("utf-8"))
8686
f.close()
87-
f=open("adsa.txt","r")
87+
f=open("forextrastuff.txt","r")
8888
page=f.read()
8989
f.close()
9090
startpoint=page.find("it1")
@@ -109,10 +109,10 @@ def makeup(x,y):
109109

110110

111111

112-
f=open("adsa.txt","w")
112+
f=open("forextrastuff.txt","w")
113113
f.write(page.encode("utf-8"))
114114
f.close()
115-
f=open("adsa.txt","r")
115+
f=open("forextrastuff.txt","r")
116116
page=f.read()
117117
f.close()
118118
startpoint=page.find("it1")
@@ -132,27 +132,45 @@ def makeup(x,y):
132132
url=x
133133
browser.get(url)
134134

135-
time.sleep(2)
135+
time.sleep(1)
136136
page=browser.page_source
137137

138138

139139

140-
f=open("adsa.txt","w")
140+
f=open("forextrastuff.txt","w")
141141
f.write(page.encode("utf-8"))
142142
f.close()
143-
f=open("adsa.txt","r")
143+
f=open("forextrastuff.txt","r")
144144
page=f.read()
145145
f.close()
146-
startpoint=page.find("it1")
147-
endpoint=page.find('</div>',startpoint)
146+
148147

149148
page=page[startpoint:endpoint]
150149
qplinks=get_all_links(page)
151150
for y in qplinks:
152-
d="http://xampaperz.com/"+y
153-
cooler.append(d)
151+
if "paper" in y:
152+
d="http://xampaperz.com/"+y
153+
cooler.append(d)
154+
155+
for x in cooler:
156+
157+
browser= webdriver.Firefox()
158+
159+
browser.get(x)
160+
time.sleep(1)
161+
page=browser.page_source
162+
f=open("forextrastuff.txt","w")
163+
f.write(page.encode("utf-8"))
164+
f.close()
165+
f=open("forextrastuff.txt","r")
166+
page=f.read()
167+
f.close()
154168

155-
print cooler
169+
soup=BeautifulSoup(page)
170+
for a in soup.find_all('a', href=True):
171+
if "papers" in str(a['href']):
172+
url="http://xampaperz.com/"+str(a['href'])
173+
filename=wget.download(url)
156174

157175
#g=open("finalstuff.txt","w")
158176
#for x in final_stuff:

ques_paper/xdownloader.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
from selenium import webdriver
2+
from bs4 import BeautifulSoup
3+
import time
4+
import urllib
5+
import urllib2
6+
import wget
7+
new_links=[]
8+
9+
browser = webdriver.Firefox()
10+
def get_next_target(page):
11+
start_link = page.find('<a href="')
12+
if start_link == -1:
13+
return None, 0
14+
start_quote = page.find('"', start_link)
15+
end_quote = page.find('"', start_quote + 1)
16+
url = page[start_quote + 1:end_quote]
17+
return url, end_quote
18+
19+
def get_all_links(page):
20+
links = []
21+
while True:
22+
url,endpos = get_next_target(page)
23+
if url:
24+
links.append(url)
25+
page = page[endpos:]
26+
else:
27+
break
28+
return links
29+
30+
def find_btech_links(links):
31+
for x in links:
32+
if "cse.php" in x:
33+
new_links.append("http://www.xampaperz.com/"+ x)
34+
return new_links
35+
final_stuff=[]
36+
def makeup(x,y):
37+
38+
pos=x.find(".php")
39+
x=x[:pos-3]
40+
i=9
41+
stream="it"
42+
while(i<14):
43+
if i==9:
44+
string1="http://xampaperz.com/papers/img/btech/"+ stream + "/sem" + str(y[25]) + "/200"+str(i)+x+".jpg/"
45+
46+
string2="http://xampaperz.com/papers/img/btech/"+ stream + "/sem" + str(y[25]) + "/200"+str(i)+x+"2.jpg/"
47+
48+
else:
49+
string1="http://xampaperz.com/papers/img/btech/"+ stream + "/sem" + str(y[25]) + "/20"+str(i)+x+".jpg/"
50+
51+
string2="http://xampaperz.com/papers/img/btech/"+ stream + "/sem" + str(y[25]) + "/20"+str(i)+x+"2.jpg/"
52+
final_stuff.append(string1)
53+
final_stuff.append(string2)
54+
i=i+1
55+
56+
url = 'http://www.xampaperz.com/'
57+
browser.get(url)
58+
59+
60+
page=browser.page_source
61+
62+
links=[]
63+
64+
f=open("forextrastuff.txt","w")
65+
f.write(page.encode("utf-8"))
66+
f.close()
67+
f=open("forextrastuff.txt","r")
68+
page=f.read()
69+
f.close()
70+
links=get_all_links(page)
71+
new_links=find_btech_links(links)
72+
73+
74+
75+
for x in new_links:
76+
url=x
77+
78+
browser.get(url)
79+
80+
81+
page=browser.page_source
82+
links=[]
83+
84+
f=open("forextrastuff.txt","w")
85+
f.write(page.encode("utf-8"))
86+
f.close()
87+
f=open("forextrastuff.txt","r")
88+
page=f.read()
89+
f.close()
90+
startpoint=page.find("it1")
91+
92+
page=page[startpoint:]
93+
94+
links=get_all_links(page)
95+
96+
for x in links:
97+
if "1stsemcse" in x or "2ndsemcse" in x or "3rdsemcse" in x or "thsemcse" in x:
98+
new_links.append("http://www.xampaperz.com/"+ x)
99+
qplinks=[]
100+
new_links.pop(0)
101+
cool=[]
102+
for x in new_links:
103+
104+
url=x
105+
browser.get(url)
106+
107+
time.sleep(2)
108+
page=browser.page_source
109+
110+
111+
112+
f=open("forextrastuff.txt","w")
113+
f.write(page.encode("utf-8"))
114+
f.close()
115+
f=open("forextrastuff.txt","r")
116+
page=f.read()
117+
f.close()
118+
startpoint=page.find("it1")
119+
endpoint=page.find("<script src=",startpoint)
120+
121+
page=page[startpoint:endpoint]
122+
qplinks=get_all_links(page)
123+
124+
for y in qplinks:
125+
d="http://xampaperz.com/"+y
126+
cool.append(d)
127+
128+
129+
cooler=[]
130+
for x in cool:
131+
132+
url=x
133+
browser.get(url)
134+
135+
time.sleep(1)
136+
page=browser.page_source
137+
138+
139+
140+
f=open("forextrastuff.txt","w")
141+
f.write(page.encode("utf-8"))
142+
f.close()
143+
f=open("forextrastuff.txt","r")
144+
page=f.read()
145+
f.close()
146+
147+
148+
page=page[startpoint:endpoint]
149+
qplinks=get_all_links(page)
150+
for y in qplinks:
151+
if "paper" in y:
152+
d="http://xampaperz.com/"+y
153+
cooler.append(d)
154+
155+
for x in cooler:
156+
157+
browser= webdriver.Firefox()
158+
159+
browser.get(x)
160+
time.sleep(1)
161+
page=browser.page_source
162+
f=open("forextrastuff.txt","w")
163+
f.write(page.encode("utf-8"))
164+
f.close()
165+
f=open("forextrastuff.txt","r")
166+
page=f.read()
167+
f.close()
168+
169+
soup=BeautifulSoup(page)
170+
for a in soup.find_all('a', href=True):
171+
if "papers" in str(a['href']):
172+
url="http://xampaperz.com/"+str(a['href'])
173+
filename=wget.download(url)
174+
175+
#g=open("finalstuff.txt","w")
176+
#for x in final_stuff:
177+
#filename=wget.download(x)
178+
#g.write(x+"\n")

0 commit comments

Comments
 (0)