Skip to content

Commit 748649f

Browse files
committed
Not much
1 parent a9df30a commit 748649f

File tree

9 files changed

+5451
-3
lines changed

9 files changed

+5451
-3
lines changed

Templates.txt

Lines changed: 255 additions & 0 deletions
Large diffs are not rendered by default.

scraping-py/final.txt

Lines changed: 5094 additions & 0 deletions
Large diffs are not rendered by default.

scraping-py/geeksforgeeks.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from BeautifulSoup import BeautifulSoup
2+
import urllib2
3+
import urllib
4+
import sys
5+
Soup=BeautifulSoup
6+
links=[]
7+
url="http://www.geeksforgeeks.org/data-structures/"
8+
f=urllib2.urlopen(url)
9+
page=f.read()
10+
f.close()
11+
page=unicode(page,'utf-8')
12+
soup=Soup(page)
13+
j=0
14+
for a in soup.findAll('a', href=True):
15+
if "http://geeksquiz.com/linked-list-set-1-introduction/" in a['href']:
16+
j=1
17+
if j==1:
18+
links.append(a['href'])
19+
i=0
20+
for x in links:
21+
if "#" in x:
22+
links.pop(i)
23+
i=i+1
24+
print links
25+
final=open("final.txt","w")
26+
for x in links:
27+
print x
28+
f=urllib.urlopen(x)
29+
page=f.read()
30+
f.close()
31+
page=unicode(page,'utf-8')
32+
page=page[page.find("<header class="):page.find("<script async src=")]
33+
soup=Soup(page)
34+
final.write(soup.getText().encode('utf-8'))

scraping-py/p.out

15.2 KB
Binary file not shown.

scraping-py/test.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#include <iostream>
2+
using namespace std;
3+
int main(int argc, char const *argv[]) {
4+
int a;
5+
int b;
6+
cout<<"Enter a";
7+
cin>>a;
8+
cout<<"Enter b";
9+
cin>>b;
10+
cout<<"Sum=A+b"<<a+b;
11+
cout<<endl;
12+
cout<<"This";
13+
cout<<endl;
14+
return 0;
15+
}

scraping-py/testingscript.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import wget
2+
import urllib2
3+
from BeautifulSoup import BeautifulSoup
4+
Soup=BeautifulSoup
5+
import BeautifulSoup
6+
i=0
7+
url="http://stim.ee.uh.edu/education/ece-3340-numerical-methods/"
8+
f=urllib2.urlopen(url)
9+
page=f.read()
10+
f.close()
11+
soup=Soup(page)
12+
for link in soup.findAll('a', href=True):
13+
if "bitbucket" in link['href'] and "ppt" not in link['href']:
14+
filename=wget.download(link['href'])

template.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from BeautifulSoup import BeautifulSoup
2+
import urllib2
3+
import wget
4+
import requests
5+
Soup=BeautifulSoup
6+
import BeautifulSoup
7+
8+
str1="Covering the nitty-gritties of C++ templates"
9+
str2="This article, along with any associated source code and files, is licensed under"
10+
url="http://www.codeproject.com/Articles/257589/An-Idiots-Guide-to-Cplusplus-Templates-Part";
11+
f=urllib2.urlopen(url)
12+
page=f.read()
13+
f.close()
14+
15+
page=page[page.find(str1):page.find(str2)]
16+
17+
soup=Soup(page)
18+
text = soup.getText()
19+
f=open("Templates.txt", "w")
20+
f.write(text.encode("utf-8"))
21+
f.close()

testingscript.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import wget
2+
import urllib2
3+
from BeautifulSoup import BeautifulSoup
4+
Soup=BeautifulSoup
5+
import BeautifulSoup
6+
i=0
7+
url="http://stim.ee.uh.edu/education/ece-3340-numerical-methods/"
8+
f=urllib2.urlopen(url)
9+
page=f.read()
10+
f.close()
11+
soup=Soup(page)
12+
for link in soup.findAll('a', href=True):
13+
print link['href']

x.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import requests
55
Soup=BeautifulSoup
66
import BeautifulSoup
7-
url="http://xampaperz.com/xampaperz/xampaperz/MAIT/xampaperz/subject.php?id=1&semkey=%3Cspan%20class="x x-first x-last">8&sem=8thsemcse&semester=8th&main=xampaerl_cse&stream_get=Computer"+ "%20"+"Science"
7+
url="http://xampaperz.com/xampaperz/xampaperz/MAIT/xampaperz/subject.php?id=1&semkey=%3Cspan%20class="x x-first x-last">7&sem=7thsemcse&semester=7th&main=xampaerl_cse&stream_get=Computer"+ "%20"+"Science"
88
f=urllib2.urlopen(url)
99
page=f.read()
1010
f.close()
@@ -35,15 +35,17 @@
3535
paper.append("http:xampaperz/xampaperz/MAIT/xampaperz/" + a['href'])
3636
final=[]
3737
for a in paper:
38-
a= a[a.find("&paper_url")+len("&paper_url")+1:a.find(".jpg")] +".jpg"
38+
a= a[a.find("&paper_url")+len("&paper_url")+1:a.find(".jpg")] +"2.jpg"
39+
b= a[a.find("&paper_url")+len("&paper_url")+1:a.find(".jpg")] +".jpg"
3940
print a
4041
final.append(a)
42+
final.append(b)
4143

4244
i=0
4345
for x in final:
4446
response = requests.get(x)
4547
if response.status_code == 200:
46-
f = open("8th Sem final" + str(i)+ ".jpg", 'wb')
48+
f = open("7th Sem Part 2 final" + str(i)+ ".jpg", 'wb')
4749
f.write(response.content)
4850
f.close()
4951
i=i+1

0 commit comments

Comments
 (0)