Skip to content

Commit 46f6dc4

Browse files
committed
Rithy Chhen: Assignment 3
1 parent 83e43f6 commit 46f6dc4

File tree

3 files changed

+157
-0
lines changed

3 files changed

+157
-0
lines changed

assignments/session03/README.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
******************************************************************
2+
Session:3 Assignments / Rithy Chhen
3+
yt.py - Youtube Mashup
4+
*****************************************************************
5+
6+
Brief Description :
7+
I am a big fan of youtube, therefore I decided to do this assignment on YouTube API. My program will do a search for
8+
the most popular video on youtube by regions. Each video is tag with duration, uploader, published date and number of view.
9+
10+
Think you should know:
11+
1) YouTube API.
12+
The YouTube Data API allows me to grab the data from youtube without authenticating.
13+
2) Knowing what regions is supported by Youtube Data API:
14+
I spend a lot of time searching for supported region by youtube.
15+
I listed over 20 different regions.
16+
3) In order to run yt.py, couple modules need to be installed:
17+
- requests
18+
- BeautifulSoup
19+
- Codecs
20+
21+
Source Code:
22+
When everything is ready. You can simply run python.py. If would take a couple of seconds to complete because I have over 20 different regions
23+
listed.
24+
25+
Output Example:
26+
Once the program is completed, a new file name most_popular.html should be created and placed in the session03 folder. Open most_popular.html
27+
with your favorite browser, you should see a list of popular video separated by regions.

assignments/session03/youtube.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
__author__ = 'rithychhen'
2+
3+
import requests
4+
import codecs
5+
from bs4 import BeautifulSoup
6+
7+
8+
#A youtube api call to do the api call and parsing the return cotent.
9+
class Youtube:
10+
11+
#constructing the Youtube API object, default region to US.
12+
def __init__(self, region_id='US'):
13+
self.region_id = region_id
14+
self.most_popular_api = 'http://gdata.youtube.com/feeds/api/standardfeeds/%s/most_popular?v=2&max-results=11'
15+
16+
#a simple function to set the regions.
17+
def set_region_id(self, region_id):
18+
self.region_id = region_id
19+
20+
#a simple get request to youtube to get the most popular video by region.
21+
#using the request module to get a get request to youtube.
22+
#return value are status and content.
23+
def get_most_popular_by_region(self):
24+
api_string = self.most_popular_api % self.region_id
25+
response = requests.get(api_string)
26+
return response.status_code, response.content
27+
28+
#parse the content from youtube using BeautifulSoup.
29+
def parse_content(self, content):
30+
parsed_content = []
31+
content = BeautifulSoup(content)
32+
content.prettify()
33+
for entry in content.find_all("entry"):
34+
parsed_content.append(self.gen_entry(entry))
35+
return parsed_content
36+
37+
#generate a single video entry.
38+
def gen_entry(self, entry):
39+
entry_parsed = dict()
40+
title = entry.title.string
41+
video_id = entry.content.find("yt:videoid")
42+
link = "http://www.youtube.com/watch?v=%s" % video_id.string
43+
entry_parsed['title'] = "<a href='%s' target='_blank'>%s</a>" % (link, title)
44+
entry_parsed['duration'] = entry.content.find("yt:duration").get("seconds")
45+
author = entry.content.author.find('name')
46+
entry_parsed['Uploader'] = author.string
47+
entry_parsed['Published on'] = entry.published.string
48+
statistic = entry.content.find("yt:statistics")
49+
entry_parsed["View count"] = statistic.get("viewcount")
50+
#entry_parsed["avorite count"]= statistic.get("favoritecount")
51+
entry_parsed["Dislikes"] = statistic.get("numdislikes")
52+
entry_parsed["Likes"] = statistic.get("numlikes")
53+
return entry_parsed
54+
55+
#write the result into html.
56+
def write_to_html(self, result):
57+
html = "<html><body><h2>Most popular videos by region</h2>"
58+
for item in result:
59+
#print item
60+
for name, content in item.iteritems():
61+
#print name
62+
html += "<h4>%s</h4>" % name
63+
if isinstance(content, str):
64+
html += content + '<br/>'
65+
else:
66+
html += "<ul>"
67+
for sub_item in content:
68+
temp_string = ''
69+
for key, value in sub_item.iteritems():
70+
if key == 'title':
71+
html += "<li><b>%s</b>" % value
72+
elif key == 'duration':
73+
html += "<i style='margin-left: 25px'>%s seconds</i><br/>" % value
74+
elif key == 'Dislikes' or key == "Likes":
75+
continue
76+
else:
77+
temp_string += "%s : %s | " % (key, value)
78+
html += temp_string + '</li>'
79+
html += "</ul><br/>"
80+
html += "</body></html>"
81+
self.put_file(html)
82+
83+
#write html to a file.
84+
def put_file(self, html):
85+
with codecs.open("most_popular.html", "w", "utf-8-sig") as f:
86+
f.write(html)
87+
f.close()
88+

assignments/session03/yt.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
__author__ = 'rithychhen'
2+
3+
#import the YoutubeAPI class.
4+
from youtube import Youtube
5+
6+
#function to list the region that I am interesting in.
7+
def generate_supported_region_ids():
8+
regions_ids = {'Argentina': 'AR', 'Australia': 'AU', 'Brazil': 'BR', 'Canada': 'CA',
9+
'Egypt': 'EG', 'France': 'FR', 'Germany': 'DE', 'Great Britain': 'GB',
10+
'Hong Kong': 'HK', 'India': 'IN', 'Ireland': 'IE', 'Italy': 'IT',
11+
'Japan': 'JP', 'Malaysia': 'MY', 'Mexico': 'MX', 'Netherlands': 'NL',
12+
'New Zealand': 'NZ', 'Russia': 'RU', 'Saudi Arabia': 'SA', 'Singapore': 'SG',
13+
'South Africa': 'ZA', 'South Korea': 'KR', 'Spain': 'ES', 'Sweden': 'SE',
14+
'Switzerland': 'CH', 'Taiwan': 'TW', 'United States': 'US'}
15+
return regions_ids
16+
17+
#main
18+
if __name__ == '__main__':
19+
#instantiate the youtubapi class. This would default the region to US and api string.
20+
youtube = Youtube()
21+
#get a dictionary of regions ids.
22+
regions_ids = generate_supported_region_ids()
23+
#a list to store the result.
24+
result = []
25+
#go through all regions ids.
26+
for name, region_id in regions_ids.iteritems():
27+
28+
temp_result = dict()
29+
#first set the region to region id.
30+
youtube.set_region_id(region_id)
31+
#then do an api to youtube to get the top 10.
32+
status, body = youtube.get_most_popular_by_region()
33+
#if response from youtube is not okay then store the error.
34+
if status != 200:
35+
temp_result[name] = 'Unable to get data for %s' % name
36+
else:
37+
#okay response. parse youtube content and store to the list.
38+
temp_result[name] = youtube.parse_content(body)
39+
#finally add the dictionary to the list.
40+
result.append(temp_result)
41+
#lastly write the list to html file.
42+
youtube.write_to_html(result)

0 commit comments

Comments
 (0)