| 
 | 1 | +__author__ = 'rithychhen'  | 
 | 2 | + | 
 | 3 | +import requests  | 
 | 4 | +import codecs  | 
 | 5 | +from bs4 import BeautifulSoup  | 
 | 6 | + | 
 | 7 | + | 
 | 8 | +#A youtube api call to do the api call and parsing the return cotent.  | 
 | 9 | +class Youtube:  | 
 | 10 | + | 
 | 11 | +    #constructing the Youtube API object, default region to US.  | 
 | 12 | +    def __init__(self, region_id='US'):  | 
 | 13 | +        self.region_id = region_id  | 
 | 14 | +        self.most_popular_api = 'http://gdata.youtube.com/feeds/api/standardfeeds/%s/most_popular?v=2&max-results=11'  | 
 | 15 | + | 
 | 16 | +    #a simple function to set the regions.  | 
 | 17 | +    def set_region_id(self, region_id):  | 
 | 18 | +        self.region_id = region_id  | 
 | 19 | + | 
 | 20 | +    #a simple get request to youtube to get the most popular video by region.  | 
 | 21 | +    #using the request module to get a get request to youtube.  | 
 | 22 | +    #return value are status and content.  | 
 | 23 | +    def get_most_popular_by_region(self):  | 
 | 24 | +        api_string = self.most_popular_api % self.region_id  | 
 | 25 | +        response = requests.get(api_string)  | 
 | 26 | +        return response.status_code, response.content  | 
 | 27 | + | 
 | 28 | +    #parse the content from youtube using BeautifulSoup.  | 
 | 29 | +    def parse_content(self, content):  | 
 | 30 | +        parsed_content = []  | 
 | 31 | +        content = BeautifulSoup(content)  | 
 | 32 | +        content.prettify()  | 
 | 33 | +        for entry in content.find_all("entry"):  | 
 | 34 | +            parsed_content.append(self.gen_entry(entry))  | 
 | 35 | +        return parsed_content  | 
 | 36 | + | 
 | 37 | +    #generate a single video entry.  | 
 | 38 | +    def gen_entry(self, entry):  | 
 | 39 | +        entry_parsed = dict()  | 
 | 40 | +        title = entry.title.string  | 
 | 41 | +        video_id = entry.content.find("yt:videoid")  | 
 | 42 | +        link = "http://www.youtube.com/watch?v=%s" % video_id.string  | 
 | 43 | +        entry_parsed['title'] = "<a href='%s' target='_blank'>%s</a>" % (link, title)  | 
 | 44 | +        entry_parsed['duration'] = entry.content.find("yt:duration").get("seconds")  | 
 | 45 | +        author = entry.content.author.find('name')  | 
 | 46 | +        entry_parsed['Uploader'] = author.string  | 
 | 47 | +        entry_parsed['Published on'] = entry.published.string  | 
 | 48 | +        statistic = entry.content.find("yt:statistics")  | 
 | 49 | +        entry_parsed["View count"] = statistic.get("viewcount")  | 
 | 50 | +        #entry_parsed["avorite count"]= statistic.get("favoritecount")  | 
 | 51 | +        entry_parsed["Dislikes"] = statistic.get("numdislikes")  | 
 | 52 | +        entry_parsed["Likes"] = statistic.get("numlikes")  | 
 | 53 | +        return entry_parsed  | 
 | 54 | + | 
 | 55 | +    #write the result into html.  | 
 | 56 | +    def write_to_html(self, result):  | 
 | 57 | +        html = "<html><body><h2>Most popular videos by region</h2>"  | 
 | 58 | +        for item in result:  | 
 | 59 | +            #print item  | 
 | 60 | +            for name, content in item.iteritems():  | 
 | 61 | +                #print name  | 
 | 62 | +                html += "<h4>%s</h4>" % name  | 
 | 63 | +                if isinstance(content, str):  | 
 | 64 | +                    html += content + '<br/>'  | 
 | 65 | +                else:  | 
 | 66 | +                    html += "<ul>"  | 
 | 67 | +                    for sub_item in content:  | 
 | 68 | +                        temp_string = ''  | 
 | 69 | +                        for key, value in sub_item.iteritems():  | 
 | 70 | +                            if key == 'title':  | 
 | 71 | +                                html += "<li><b>%s</b>" % value  | 
 | 72 | +                            elif key == 'duration':  | 
 | 73 | +                                html += "<i style='margin-left: 25px'>%s seconds</i><br/>" % value  | 
 | 74 | +                            elif key == 'Dislikes' or key == "Likes":  | 
 | 75 | +                                continue  | 
 | 76 | +                            else:  | 
 | 77 | +                                temp_string += "%s : %s | " % (key, value)  | 
 | 78 | +                        html += temp_string + '</li>'  | 
 | 79 | +                    html += "</ul><br/>"  | 
 | 80 | +        html += "</body></html>"  | 
 | 81 | +        self.put_file(html)  | 
 | 82 | + | 
 | 83 | +    #write html to a file.  | 
 | 84 | +    def put_file(self, html):  | 
 | 85 | +        with codecs.open("most_popular.html", "w", "utf-8-sig") as f:  | 
 | 86 | +            f.write(html)  | 
 | 87 | +            f.close()  | 
 | 88 | + | 
0 commit comments