1- import requests
1+ from requests_html import HTMLSession
22from bs4 import BeautifulSoup as bs
33
4+ # init session
5+ session = HTMLSession ()
6+
47
58def get_video_info (url ):
69 # download HTML code
7- content = requests .get (url )
10+ response = session .get (url )
11+ # execute Javascript
12+ response .html .render (sleep = 1 )
813 # create beautiful soup object to parse HTML
9- soup = bs (content .content , "html.parser" )
14+ soup = bs (response .html .html , "html.parser" )
15+ # open("index.html", "w").write(response.html.html)
1016 # initialize the result
1117 result = {}
1218 # video title
13- result [' title' ] = soup .find ("span" , attrs = { "class" : "watch-title" } ).text .strip ()
19+ result [" title" ] = soup .find ("h1" ).text .strip ()
1420 # video views (converted to integer)
15- result [' views' ] = int (soup .find ("div " , attrs = {"class" : "watch- view-count" }).text [: - 6 ]. replace ( "," , "" ))
21+ result [" views" ] = int ('' . join ([ c for c in soup .find ("span " , attrs = {"class" : "view-count" }).text if c . isdigit () ] ))
1622 # video description
17- result [' description' ] = soup .find ("p " , attrs = { "id " : "eow-description " }).text
23+ result [" description" ] = soup .find ("yt-formatted-string " , { "class " : "content " }).text
1824 # date published
19- result ['date_published' ] = soup .find ("strong" , attrs = {"class" : "watch-time-text" }).text
20- # number of likes as integer
21- result ['likes' ] = int (soup .find ("button" , attrs = {"title" : "I like this" }).text .replace ("," , "" ))
22- # number of dislikes as integer
23- result ['dislikes' ] = int (soup .find ("button" , attrs = {"title" : "I dislike this" }).text .replace ("," , "" ))
25+ result ["date_published" ] = soup .find ("div" , {"id" : "date" }).text [1 :]
26+ # get the duration of the video
27+ result ["duration" ] = soup .find ("span" , {"class" : "ytp-time-duration" }).text
28+ # get the video tags
29+ result ["tags" ] = ', ' .join ([ meta .attrs .get ("content" ) for meta in soup .find_all ("meta" , {"property" : "og:video:tag" }) ])
30+ # number of likes
31+ text_yt_formatted_strings = soup .find_all ("yt-formatted-string" , {"id" : "text" , "class" : "ytd-toggle-button-renderer" })
32+ result ["likes" ] = int ('' .join ([ c for c in text_yt_formatted_strings [0 ].attrs .get ("aria-label" ) if c .isdigit () ]))
33+ # number of dislikes
34+ result ["dislikes" ] = int ('' .join ([ c for c in text_yt_formatted_strings [1 ].attrs .get ("aria-label" ) if c .isdigit () ]))
35+
2436 # channel details
25- channel_tag = soup .find ("div " , attrs = {"class" : "yt-user-info " }).find ("a" )
37+ channel_tag = soup .find ("yt-formatted-string " , {"class" : "ytd-channel-name " }).find ("a" )
2638 # channel name
2739 channel_name = channel_tag .text
2840 # channel URL
2941 channel_url = f"https://www.youtube.com{ channel_tag ['href' ]} "
3042 # number of subscribers as str
31- channel_subscribers = soup .find ("span " , attrs = { "class " : "yt-subscriber -count" }).text .strip ()
43+ channel_subscribers = soup .find ("yt-formatted-string " , { "id " : "owner-sub -count" }).text .strip ()
3244 result ['channel' ] = {'name' : channel_name , 'url' : channel_url , 'subscribers' : channel_subscribers }
3345 return result
3446
@@ -46,10 +58,12 @@ def get_video_info(url):
4658 # print in nice format
4759 print (f"Title: { data ['title' ]} " )
4860 print (f"Views: { data ['views' ]} " )
49- print (f"\n Description: { data ['description' ]} \n " )
50- print (data ['date_published' ])
61+ print (f"Published at: { data ['date_published' ]} " )
62+ print (f"Video Duration: { data ['duration' ]} " )
63+ print (f"Video tags: { data ['tags' ]} " )
5164 print (f"Likes: { data ['likes' ]} " )
5265 print (f"Dislikes: { data ['dislikes' ]} " )
66+ print (f"\n Description: { data ['description' ]} \n " )
5367 print (f"\n Channel Name: { data ['channel' ]['name' ]} " )
5468 print (f"Channel URL: { data ['channel' ]['url' ]} " )
5569 print (f"Channel Subscribers: { data ['channel' ]['subscribers' ]} " )
0 commit comments