|
| 1 | +import mechanize |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +import urllib2 |
| 4 | +# Create a Browser |
| 5 | +b = mechanize.Browser() |
| 6 | + |
| 7 | +# Disable loading robots.txt |
| 8 | +b.set_handle_robots(False) |
| 9 | + |
| 10 | +b.addheaders = [('User-agent', |
| 11 | + 'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98;)')] |
| 12 | +nm=raw_input("enter title ") |
| 13 | +# Navigate |
| 14 | +b.open('http://www.imdb.com/search/title') |
| 15 | + |
| 16 | +# Choose a form |
| 17 | +b.select_form(nr=1) |
| 18 | + |
| 19 | + |
| 20 | +b['title'] = nm |
| 21 | + |
| 22 | +b.find_control(type="checkbox",nr=0).get("feature").selected = True |
| 23 | + |
| 24 | + |
| 25 | +# Submit |
| 26 | +fd = b.submit() |
| 27 | + |
| 28 | +soup = BeautifulSoup(fd.read(),'html5lib') |
| 29 | + |
| 30 | +#data= soup.find_all('td',class_="title") |
| 31 | +#for div in data: |
| 32 | +# links= div.find_all('a') |
| 33 | + # for a in links: |
| 34 | + # print a['href']; |
| 35 | + |
| 36 | + |
| 37 | +for div in soup.findAll('td', {'class': 'title'},limit=1): |
| 38 | + a = div.findAll('a')[0] |
| 39 | + print a.text.strip(), '=>', a.attrs['href'] |
| 40 | + hht='http://www.imdb.com'+a.attrs['href'] |
| 41 | + print(hht) |
| 42 | + page=urllib2.urlopen(hht) |
| 43 | + soup2 = BeautifulSoup(page.read(),'html.parser') |
| 44 | + print("title of the movie: ") |
| 45 | + print(soup2.find(itemprop="name").get_text()) |
| 46 | + print("timerun: ") |
| 47 | + print(soup2.find(itemprop="duration").get_text()) |
| 48 | + print("genre: ") |
| 49 | + print(soup2.find(itemprop="genre").get_text()) |
| 50 | + print("current IMDB rating:") |
| 51 | + print(soup2.find(itemprop="ratingValue").get_text()) |
| 52 | + print("summary:") |
| 53 | + print(soup2.find(itemprop="description").get_text()) |
0 commit comments