File tree Expand file tree Collapse file tree 3 files changed +61
-0
lines changed
Week 4 Programs that Surf the Web Expand file tree Collapse file tree 3 files changed +61
-0
lines changed Original file line number Diff line number Diff line change
1
+ import urllib .request as ur
2
+ from bs4 import *
3
+
4
+ url = input ('Enter the url to scrape - ' )
5
+
6
+ html = ur .urlopen (url ).read ()
7
+ soup = BeautifulSoup (html , 'html.parser' )
8
+
9
+ count_of_spans = 0
10
+ sum = 0
11
+
12
+ spans = soup ('span' )
13
+ for span in spans :
14
+ sum += int (span .contents [0 ])
15
+ count_of_spans += 1
16
+
17
+ print ('Count ' , count_of_spans )
18
+ print ('Sum ' , sum )
Original file line number Diff line number Diff line change
1
+ import urllib .request as ur
2
+ from bs4 import *
3
+
4
+ current_repeat_count = 0
5
+ url = input ('Enter URL: ' )
6
+ repeat_count = int (input ('Enter count: ' ))
7
+ position = int (input ('Enter position: ' ))
8
+
9
+
10
+ def parse_html (url ):
11
+ html = ur .urlopen (url ).read ()
12
+ soup = BeautifulSoup (html , 'html.parser' )
13
+ tags = soup ('a' )
14
+ return tags
15
+
16
+ while current_repeat_count < repeat_count :
17
+ print ('Retrieving: ' , url )
18
+ tags = parse_html (url )
19
+ for index , item in enumerate (tags ):
20
+ if index == position - 1 :
21
+ url = item .get ('href' , None )
22
+ name = item .contents [0 ]
23
+ break
24
+ else :
25
+ continue
26
+ current_repeat_count += 1
27
+ print ('Last Url: ' , url )
Original file line number Diff line number Diff line change
1
+ import urllib .request as ur
2
+ from bs4 import *
3
+
4
+ url = input ('Enter the url to scrape - ' )
5
+ # http://www.dr-chuck.com
6
+
7
+ html = ur .urlopen (url ).read ()
8
+ soup = BeautifulSoup (html , 'html.parser' )
9
+ # wrapping the whole HTML into a single soup object
10
+
11
+ tags = soup ('a' )
12
+ # extracts all 'a' tag from the HTML object
13
+
14
+ for tag in tags :
15
+ print (tag .get ('href' ), None )
16
+ # each tag is returned as a dictionary of its attributes
You can’t perform that action at this time.
0 commit comments