File tree Expand file tree Collapse file tree 1 file changed +33
-0
lines changed
Expand file tree Collapse file tree 1 file changed +33
-0
lines changed Original file line number Diff line number Diff line change 1+ # -*- coding: cp1252 -*-
2+ """
3+ Page Scraper - Create an application which connects to a
4+ site and pulls out all links, or images, and saves them to
5+ a list. Optional: Organize the indexed content and don’t
6+ allow duplicates. Have it put the results into an easily
7+ searchable index file.
8+ """
9+
10+ import urllib2
11+ from bs4 import BeautifulSoup
12+
13+
14+ def print_list (stuff ):
15+ print '\n ' .join (stuff )
16+ print '\n ====================\n '
17+
18+ if __name__ == '__main__' :
19+
20+ url = raw_input ('Enter a URL: ' )
21+
22+ choice = input ('What to scrape?\n 1. Links\n 2. Images\n 3. Both\n ' )
23+
24+ soup = BeautifulSoup (urllib2 .urlopen (url ).read ())
25+
26+ if choice == 1 or choice == 3 :
27+ urls = [link .get ('href' ) for link in soup .findAll ('a' )]
28+ print 'URLs:'
29+ print_list (urls )
30+ if choice == 2 or choice == 3 :
31+ images = [image ['src' ] for image in soup .findAll ("img" )]
32+ print 'Images:'
33+ print_list (images )
You can’t perform that action at this time.
0 commit comments