Skip to content

Commit 706dfe4

Browse files
authored
[crunchyroll:beta] Add cookies support (yt-dlp#2506)
* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in. * Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats Closes yt-dlp#1911 Authored by: tejing1
1 parent c4da5ff commit 706dfe4

File tree

2 files changed

+121
-7
lines changed

2 files changed

+121
-7
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,6 +1670,11 @@ The following extractors use this feature:
16701670
* `language`: Languages to extract. Eg: `crunchyroll:language=jaJp`
16711671
* `hardsub`: Which hard-sub versions to extract. Eg: `crunchyroll:hardsub=None,enUS`
16721672

1673+
#### crunchyroll:beta
1674+
* `format`: Which stream type(s) to extract. Default is `adaptive_hls` Eg: `crunchyrollbeta:format=vo_adaptive_hls`
1675+
* Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `trailer_hls`, `trailer_dash`
1676+
* `hardsub`: Preference order for which hardsub versions to extract. Default is `None` (no hardsubs). Eg: `crunchyrollbeta:hardsub=en-US,None`
1677+
16731678
#### vikichannel
16741679
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
16751680

yt_dlp/extractor/crunchyroll.py

Lines changed: 116 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# coding: utf-8
22
from __future__ import unicode_literals
33

4+
import base64
45
import re
56
import json
67
import zlib
@@ -23,13 +24,16 @@
2324
bytes_to_intlist,
2425
extract_attributes,
2526
float_or_none,
27+
format_field,
2628
intlist_to_bytes,
2729
int_or_none,
30+
join_nonempty,
2831
lowercase_escape,
2932
merge_dicts,
3033
qualities,
3134
remove_end,
3235
sanitized_Request,
36+
traverse_obj,
3337
try_get,
3438
urlencode_postdata,
3539
xpath_text,
@@ -733,13 +737,118 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
733737
def _real_extract(self, url):
734738
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
735739
webpage = self._download_webpage(url, display_id)
736-
episode_data = self._parse_json(
737-
self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'),
738-
display_id)['content']['byId'][internal_id]
739-
video_id = episode_data['external_id'].split('.')[1]
740-
series_id = episode_data['episode_metadata']['series_slug_title']
741-
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
742-
CrunchyrollIE.ie_key(), video_id)
740+
initial_state = self._parse_json(
741+
self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'),
742+
display_id)
743+
episode_data = initial_state['content']['byId'][internal_id]
744+
if not self._get_cookies(url).get('etp_rt'):
745+
video_id = episode_data['external_id'].split('.')[1]
746+
series_id = episode_data['episode_metadata']['series_slug_title']
747+
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
748+
CrunchyrollIE.ie_key(), video_id)
749+
750+
app_config = self._parse_json(
751+
self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'),
752+
display_id)
753+
client_id = app_config['cxApiParams']['accountAuthClientId']
754+
api_domain = app_config['cxApiParams']['apiDomain']
755+
basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii')
756+
auth_response = self._download_json(
757+
f'{api_domain}/auth/v1/token', display_id,
758+
note='Authenticating with cookie',
759+
headers={
760+
'Authorization': 'Basic ' + basic_token
761+
}, data='grant_type=etp_rt_cookie'.encode('ascii'))
762+
policy_response = self._download_json(
763+
f'{api_domain}/index/v2', display_id,
764+
note='Retrieving signed policy',
765+
headers={
766+
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
767+
})
768+
bucket = policy_response['cms']['bucket']
769+
params = {
770+
'Policy': policy_response['cms']['policy'],
771+
'Signature': policy_response['cms']['signature'],
772+
'Key-Pair-Id': policy_response['cms']['key_pair_id']
773+
}
774+
locale = traverse_obj(initial_state, ('localization', 'locale'))
775+
if locale:
776+
params['locale'] = locale
777+
episode_response = self._download_json(
778+
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
779+
note='Retrieving episode metadata',
780+
query=params)
781+
if episode_response.get('is_premium_only') and not episode_response.get('playback'):
782+
raise ExtractorError('This video is for premium members only.', expected=True)
783+
stream_response = self._download_json(
784+
episode_response['playback'], display_id,
785+
note='Retrieving stream info')
786+
787+
thumbnails = []
788+
for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
789+
for thumbnail_data in thumbnails_data:
790+
thumbnails.append({
791+
'url': thumbnail_data.get('source'),
792+
'width': thumbnail_data.get('width'),
793+
'height': thumbnail_data.get('height'),
794+
})
795+
subtitles = {}
796+
for lang, subtitle_data in stream_response.get('subtitles').items():
797+
subtitles[lang] = [{
798+
'url': subtitle_data.get('url'),
799+
'ext': subtitle_data.get('format')
800+
}]
801+
802+
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
803+
hardsub_preference = qualities(requested_hardsubs[::-1])
804+
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
805+
806+
formats = []
807+
for stream_type, streams in stream_response.get('streams', {}).items():
808+
if stream_type not in requested_formats:
809+
continue
810+
for stream in streams.values():
811+
hardsub_lang = stream.get('hardsub_locale') or ''
812+
if hardsub_lang.lower() not in requested_hardsubs:
813+
continue
814+
format_id = join_nonempty(
815+
stream_type,
816+
format_field(stream, 'hardsub_locale', 'hardsub-%s'))
817+
if not stream.get('url'):
818+
continue
819+
if stream_type.split('_')[-1] == 'hls':
820+
adaptive_formats = self._extract_m3u8_formats(
821+
stream['url'], display_id, 'mp4', m3u8_id=format_id,
822+
note='Downloading %s information' % format_id,
823+
fatal=False)
824+
elif stream_type.split('_')[-1] == 'dash':
825+
adaptive_formats = self._extract_mpd_formats(
826+
stream['url'], display_id, mpd_id=format_id,
827+
note='Downloading %s information' % format_id,
828+
fatal=False)
829+
for f in adaptive_formats:
830+
if f.get('acodec') != 'none':
831+
f['language'] = stream_response.get('audio_locale')
832+
f['quality'] = hardsub_preference(hardsub_lang.lower())
833+
formats.extend(adaptive_formats)
834+
self._sort_formats(formats)
835+
836+
return {
837+
'id': internal_id,
838+
'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
839+
'description': episode_response.get('description').replace(r'\r\n', '\n'),
840+
'duration': float_or_none(episode_response.get('duration_ms'), 1000),
841+
'thumbnails': thumbnails,
842+
'series': episode_response.get('series_title'),
843+
'series_id': episode_response.get('series_id'),
844+
'season': episode_response.get('season_title'),
845+
'season_id': episode_response.get('season_id'),
846+
'season_number': episode_response.get('season_number'),
847+
'episode': episode_response.get('title'),
848+
'episode_number': episode_response.get('sequence_number'),
849+
'subtitles': subtitles,
850+
'formats': formats
851+
}
743852

744853

745854
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):

0 commit comments

Comments
 (0)