Skip to content

Commit e8c71e4

Browse files
committed
Updated
1 parent bc7c4b2 commit e8c71e4

File tree

6 files changed

+1619
-3
lines changed

6 files changed

+1619
-3
lines changed

ques_paper/adsa.txt

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
<html xmlns="http://www.w3.org/1999/xhtml"><head>
2+
<title>
3+
Xampaperz
4+
</title>
5+
<link type="image/ico" href="image/System-Black-Folder-icon.png" rel="icon" />
6+
<link href="ipu5.css" type="text/css" rel="stylesheet" />
7+
<script type="text/javascript" async="" src="https://d31qbv1cthcecs.cloudfront.net/atrk.js"></script><script src="jav.js"></script>
8+
<script src="js/bjqs-1.3.min.js"></script>
9+
10+
<!-- Start Alexa Certify Javascript --><script type="text/javascript">_atrk_opts = { atrk_acct:"IRlzk1agMW00Ge", domain:"xampaperz.com",dynamic: true};(function() { var as = document.createElement('script'); as.type = 'text/javascript'; as.async = true; as.src = "https://d31qbv1cthcecs.cloudfront.net/atrk.js"; var s = document.getElementsByTagName('script')[0];s.parentNode.insertBefore(as, s); })();</script><noscript>&lt;img src="https://d5nxst8fruw4z.cloudfront.net/atrk.gif?account=IRlzk1agMW00Ge" style="display:none" height="1" width="1" alt="" /&gt;</noscript><!-- End Alexa Certify Javascript -->
11+
</head>
12+
<body>
13+
<div class="boss">
14+
<div class="hea">
15+
<a href="index.php" class="e"><img id="first_change" src="image/home.png" /></a>
16+
<div class="inhea">
17+
<ul>
18+
   <li><b><a href="index.php">Home</a></b></li>        
19+
<li><a href="contact.php">Contact us</a></li>         
20+
<li><a href="privacy.php">Privacy policy</a></li>
21+
</ul>
22+
</div>
23+
<span class="f">
24+
<ul>
25+
<li><a target="_blank" href="https://twitter.com/xampapez"><img src="image/Twitter.png" id="twee" /></a></li>  
26+
<li><a target="_blank" href="https://www.facebook.com/xampaperz"><img src="image/facebook.png" id="fac" /></a></li>  
27+
<li><a target="_blank" href="http://www.youtube.com/channel/UCn55Wysvtic-TTpqOMqHQnw"><img src="image/youtube.gif" id="you" /></a></li>     
28+
</ul>
29+
</span>
30+
</div>
31+
<form id="myform" method="Post" action="submit.php">
32+
<input type="text" placeholder="search your college" id="fir" name="nth" /><input type="submit" id="sub" value="go" name="none" />
33+
</form>
34+
<div class="h"><a href="index.php">
35+
Xampaperz</a><b><h3>
36+
                                    Lets begin wisely this time</h3></b></div>
37+
38+
<div class="sec">
39+
<div class="in">
40+
<ul>
41+
   <li><a href="index.php">IPU home</a></li>
42+
<li><a href="uslls.php">USLLS</a></li>
43+
<li id="ipu_home"><a href="usict.php" style="color:#CCBDAD;">USICT</a></li>
44+
<li><a href="usbt.php">USBT</a></li>
45+
<li><a href="usms.php">USMS</a></li>
46+
<li><a href="about.php">About us</a></li>
47+
</ul></div>
48+
</div>
49+
<div class="top">
50+
<div class="nav">
51+
<ul>
52+
   <li id="logan4"><a href="ballb.php">BA-LLB</a></li> 
53+
<li><a href="bballb.php">BBA-LLB</a></li>                            
54+
<li><a href="btech.php">B.Tech</a></li>                                
55+
<li id="logan6"><a href="mtechbt.php">M.Tech(BT)</a></li>                         
56+
<li id="logan"><a href="mscem.php">Msc(EM)</a></li> 
57+
<li><a href="mscembc.php">Msc(EMBC)</a></li>         
58+
<li id="logan1"><a href="maenglish.php">MA(English)</a></li>    <br /><br />
59+
<li id="logan5"><a href="mbafm.php">MBA(MS FM)</a></li> 
60+
<li><a href="mba.php" id="logan33">MBA(MS)</a></li>            
61+
<li><a href="cse.php">CSE</a></li> /
62+
<li><a href="it.php">IT</a></li> /
63+
<li><a href="ece.php">ECE</a></li> /
64+
<li><a href="bt.php">BT</a></li> /
65+
<li><a href="ct.php">CT</a></li>          
66+
<li id="logan8"><a href="mtechct.php">M.Tech(CT)</a></li> /
67+
<li><a href="mtechnst.php">M.Tech(NST)</a></li>          
68+
<li id="logan2"><a href="mtechfpt.php">M.Tech(FPT)</a></li> /
69+
<li><a href="mtechep.php">M.Tech(EP)</a></li>          
70+
<li id="logan3"><a href="biochem.php">B.Tech(Bio chem)</a></li>
71+
</ul>
72+
</div>
73+
</div>
74+
<div class="it">
75+
<div id="topit"><a href="btech.php">B.TECH</a>  &gt;&gt; <a href="cse.php">CSE</a> &gt; <a href="5thsemcse.php"><span style="font-family:OldPress;font-size:25px;">5</span>TH SEMESTER</a> &gt; OBJECT ORIENTED SOFTWARE ENGG.</div>
76+
<img id="itimg" src="image/it.png" />
77+
<div class="it1">
78+
<a href=""><img id="itimg2" src="image/it%202.png" />
79+
</a><a href="no_paper.php?img1=img/btech/it/sem5/2011objectorientedsoftwareengineering.jpg&amp;img2=img/btech/it/sem5/2011objectorientedsoftwareengineering2.jpg&amp;sub=Object%20Oriented%20Software&amp;sub1=objectorientedsoftwareengineeringcse&amp;sem=5th%20semester&amp;semreal=5thsemcse&amp;course=B.tech&amp;coursereal=btech&amp;stream=CSE&amp;year=2011"><img id="te" src="image/year/2011.png" /></a>
80+
<a href="paper.php?img1=img/btech/it/sem5/2012objectorientedsoftwareengineering.jpg&amp;img2=img/btech/it/sem5/2012objectorientedsoftwareengineering2.jpg&amp;sub=Object%20Oriented%20Software&amp;sub1=objectorientedsoftwareengineeringcse&amp;sem=5th%20semester&amp;semreal=5thsemcse&amp;course=B.tech&amp;coursereal=btech&amp;stream=CSE&amp;year=2012"><img id="tt" src="image/year/2012.png" /></a>
81+
<a href="paper.php?img1=img/btech/it/sem5/2013objectorientedsoftwareengineering.jpg&amp;img2=img/btech/it/sem5/2013objectorientedsoftwareengineering2.jpg&amp;sub=Object%20Oriented%20Software&amp;sub1=objectorientedsoftwareengineeringcse&amp;sem=5th%20semester&amp;semreal=5thsemcse&amp;course=B.tech&amp;coursereal=btech&amp;stream=CSE&amp;year=2013"><img id="tth" src="image/year/2013.png" /></a>
82+
<a href="uppaper.php"><img id="tfo" src="image/year/2014.png" /></a>
83+
<a href="zipoosoftware.php"><img id="zip" alt="all papers of this subject zipped" src="image/year/zip.png" /></a>
84+
<a href="paper2.php?img1=img/btech/it/sem5/2008objectorientedsoftwareengineering.jpg&amp;img2=img/btech/it/sem5/2008objectorientedsoftwareengineering2.jpg&amp;sub=Object%20Oriented%20Software&amp;sub1=objectorientedsoftwareengineeringcse&amp;sem=5th%20semester&amp;semreal=5thsemcse&amp;course=B.tech&amp;coursereal=btech&amp;stream=CSE&amp;year=2008"><img id="tei2" src="image/year/2008.png" /></a>
85+
<a href="paper2.php?img1=img/btech/it/sem5/2009objectorientedsoftwareengineering.jpg&amp;img2=img/btech/it/sem5/2009objectorientedsoftwareengineering2.jpg&amp;sub=Object%20Oriented%20Software&amp;sub1=objectorientedsoftwareengineeringcse&amp;sem=5th%20semester&amp;semreal=5thsemcse&amp;course=B.tech&amp;coursereal=btech&amp;stream=CSE&amp;year=2009"><img id="tn" src="image/year/2009.png" /></a>
86+
<a href="paper2.php?img1=img/btech/it/sem5/2010objectorientedsoftwareengineering.jpg&amp;img2=img/btech/it/sem5/2010objectorientedsoftwareengineering2.jpg&amp;sub=Object%20Oriented%20Software&amp;sub1=objectorientedsoftwareengineeringcse&amp;sem=5th%20semester&amp;semreal=5thsemcse&amp;course=B.tech&amp;coursereal=btech&amp;stream=CSE&amp;year=2010"><img id="tten" src="image/year/2010.png" /></a>
87+
88+
<ul>
89+
<li><a href="objectorientedsoftwareengineeringcse.php" style="color:#4786b2;">SOFTWARE ENGG.</a></li>
90+
<li><a href="theoryofcomputationcse.php">THEORY OF <br />COMPUTATION</a></li>
91+
<li><a href="analoganddigitalcommunicationcse.php">ANALOG AND <br />DIGITAL COMMUN..</a></li>
92+
<li><a href="digitaldesignusingvhdlcse.php">DIGITAL DESIGN <br />USING VHDL</a></li>
93+
<li><a href="computerarchitecturecse.php">COMPUTER <br /> ARCHITECTURE</a></li>
94+
<li><a href="digitalsignalprocessingcse.php">DIGITAL SIGNAL <br /> PROCESSING</a></li>
95+
</ul>
96+
</div>
97+
</div>
98+
<div class="end">
99+
<div class="end2">
100+
<ul>
101+
<li style="margin-left:-15px">© Copyright Xampaperz Ltd 2013</li>           
102+
<li><a href="cookie.php">Cookie Policy</a></li>
103+
<li><a href="privacy.php">Privacy Policy</a></li>
104+
<li><a href="term.php">Terms &amp; Conditions</a></li>
105+
<li><a href="contact.php">Contacting Us</a></li>
106+
</ul>
107+
</div>
108+
</div>
109+
110+
111+
112+
</div></body></html>

ques_paper/dry.py

Lines changed: 117 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,129 @@
11
from selenium import webdriver
22
from bs4 import BeautifulSoup
33
import time
4-
4+
import urllib2
5+
import wget
6+
new_links=[]
57

68
browser = webdriver.Firefox()
9+
def get_next_target(page):
10+
start_link = page.find('<a href="')
11+
if start_link == -1:
12+
return None, 0
13+
start_quote = page.find('"', start_link)
14+
end_quote = page.find('"', start_quote + 1)
15+
url = page[start_quote + 1:end_quote]
16+
return url, end_quote
17+
18+
def get_all_links(page):
19+
links = []
20+
while True:
21+
url,endpos = get_next_target(page)
22+
if url:
23+
links.append(url)
24+
page = page[endpos:]
25+
else:
26+
break
27+
return links
28+
29+
def find_btech_links(links):
30+
for x in links:
31+
if "cse.php" in x:
32+
new_links.append("http://www.xampaperz.com/"+ x)
33+
return new_links
34+
final_stuff=[]
35+
def makeup(x,y):
36+
37+
pos=x.find(".php")
38+
x=x[:pos]
39+
i=9
40+
41+
while(i<14):
42+
if i==9:
43+
string1="http://xampaperz.com/papers/img/btech/cse/sem" + str(y[25]) + "/200"+str(i)+x+".jpg/"
44+
45+
string2="http://xampaperz.com/papers/img/btech/cse/sem" + str(y[25]) + "/200"+str(i)+x+"2.jpg/"
46+
47+
else:
48+
string1="http://xampaperz.com/papers/img/btech/cse/sem" + str(y[25]) + "/20"+str(i)+x+".jpg/"
49+
50+
string2="http://xampaperz.com/papers/img/btech/cse/sem" + str(y[25]) + "/20"+str(i)+x+"2.jpg/"
51+
final_stuff.append(string1)
52+
final_stuff.append(string2)
53+
i=i+1
54+
55+
url = 'http://www.xampaperz.com/'
56+
browser.get(url)
57+
58+
59+
page=browser.page_source
60+
61+
links=[]
62+
63+
f=open("adsa.txt","w")
64+
f.write(page.encode("utf-8"))
65+
f.close()
66+
f=open("adsa.txt","r")
67+
page=f.read()
68+
f.close()
69+
links=get_all_links(page)
70+
new_links=find_btech_links(links)
71+
72+
73+
74+
for x in new_links:
75+
url=x
776

8-
url = 'http://www.xampaperz.com/1stsemcse.php'
977
browser.get(url)
10-
time.sleep(10)
78+
1179

1280
page=browser.page_source
81+
links=[]
82+
1383
f=open("adsa.txt","w")
1484
f.write(page.encode("utf-8"))
1585
f.close()
86+
f=open("adsa.txt","r")
87+
page=f.read()
88+
f.close()
89+
startpoint=page.find("it1")
90+
91+
page=page[startpoint:]
92+
93+
links=get_all_links(page)
94+
95+
for x in links:
96+
if "1stsemcse" in x or "2ndsemcse" in x or "3rdsemcse" in x or "thsemcse" in x:
97+
new_links.append("http://www.xampaperz.com/"+ x)
98+
qplinks=[]
99+
new_links.pop(0)
100+
for x in new_links:
101+
102+
url=x
103+
browser.get(url)
104+
105+
time.sleep(2)
106+
page=browser.page_source
107+
108+
109+
110+
f=open("adsa.txt","w")
111+
f.write(page.encode("utf-8"))
112+
f.close()
113+
f=open("adsa.txt","r")
114+
page=f.read()
115+
f.close()
116+
startpoint=page.find("it1")
117+
endpoint=page.find("<script src=",startpoint)
118+
119+
page=page[startpoint:endpoint]
120+
qplinks=get_all_links(page)
121+
122+
for y in qplinks:
123+
y= makeup(y,x)
124+
125+
browser.close()
126+
g=open("finalstuff.txt","w")
127+
for x in final_stuff:
128+
urllib.urlretrieve(x, filename=save_as)
129+
g.write(x+"\n")

0 commit comments

Comments
 (0)