|
1 | 1 | # coding: utf-8
|
2 | 2 | from __future__ import unicode_literals
|
3 | 3 |
|
| 4 | +import base64 |
4 | 5 | import re
|
5 | 6 | import json
|
6 | 7 | import zlib
|
|
23 | 24 | bytes_to_intlist,
|
24 | 25 | extract_attributes,
|
25 | 26 | float_or_none,
|
| 27 | + format_field, |
26 | 28 | intlist_to_bytes,
|
27 | 29 | int_or_none,
|
| 30 | + join_nonempty, |
28 | 31 | lowercase_escape,
|
29 | 32 | merge_dicts,
|
30 | 33 | qualities,
|
31 | 34 | remove_end,
|
32 | 35 | sanitized_Request,
|
| 36 | + traverse_obj, |
33 | 37 | try_get,
|
34 | 38 | urlencode_postdata,
|
35 | 39 | xpath_text,
|
@@ -733,13 +737,118 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
733 | 737 | def _real_extract(self, url):
|
734 | 738 | lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
|
735 | 739 | webpage = self._download_webpage(url, display_id)
|
736 |
| - episode_data = self._parse_json( |
737 |
| - self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'), |
738 |
| - display_id)['content']['byId'][internal_id] |
739 |
| - video_id = episode_data['external_id'].split('.')[1] |
740 |
| - series_id = episode_data['episode_metadata']['series_slug_title'] |
741 |
| - return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}', |
742 |
| - CrunchyrollIE.ie_key(), video_id) |
| 740 | + initial_state = self._parse_json( |
| 741 | + self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), |
| 742 | + display_id) |
| 743 | + episode_data = initial_state['content']['byId'][internal_id] |
| 744 | + if not self._get_cookies(url).get('etp_rt'): |
| 745 | + video_id = episode_data['external_id'].split('.')[1] |
| 746 | + series_id = episode_data['episode_metadata']['series_slug_title'] |
| 747 | + return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}', |
| 748 | + CrunchyrollIE.ie_key(), video_id) |
| 749 | + |
| 750 | + app_config = self._parse_json( |
| 751 | + self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), |
| 752 | + display_id) |
| 753 | + client_id = app_config['cxApiParams']['accountAuthClientId'] |
| 754 | + api_domain = app_config['cxApiParams']['apiDomain'] |
| 755 | + basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii') |
| 756 | + auth_response = self._download_json( |
| 757 | + f'{api_domain}/auth/v1/token', display_id, |
| 758 | + note='Authenticating with cookie', |
| 759 | + headers={ |
| 760 | + 'Authorization': 'Basic ' + basic_token |
| 761 | + }, data='grant_type=etp_rt_cookie'.encode('ascii')) |
| 762 | + policy_response = self._download_json( |
| 763 | + f'{api_domain}/index/v2', display_id, |
| 764 | + note='Retrieving signed policy', |
| 765 | + headers={ |
| 766 | + 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] |
| 767 | + }) |
| 768 | + bucket = policy_response['cms']['bucket'] |
| 769 | + params = { |
| 770 | + 'Policy': policy_response['cms']['policy'], |
| 771 | + 'Signature': policy_response['cms']['signature'], |
| 772 | + 'Key-Pair-Id': policy_response['cms']['key_pair_id'] |
| 773 | + } |
| 774 | + locale = traverse_obj(initial_state, ('localization', 'locale')) |
| 775 | + if locale: |
| 776 | + params['locale'] = locale |
| 777 | + episode_response = self._download_json( |
| 778 | + f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id, |
| 779 | + note='Retrieving episode metadata', |
| 780 | + query=params) |
| 781 | + if episode_response.get('is_premium_only') and not episode_response.get('playback'): |
| 782 | + raise ExtractorError('This video is for premium members only.', expected=True) |
| 783 | + stream_response = self._download_json( |
| 784 | + episode_response['playback'], display_id, |
| 785 | + note='Retrieving stream info') |
| 786 | + |
| 787 | + thumbnails = [] |
| 788 | + for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')): |
| 789 | + for thumbnail_data in thumbnails_data: |
| 790 | + thumbnails.append({ |
| 791 | + 'url': thumbnail_data.get('source'), |
| 792 | + 'width': thumbnail_data.get('width'), |
| 793 | + 'height': thumbnail_data.get('height'), |
| 794 | + }) |
| 795 | + subtitles = {} |
| 796 | + for lang, subtitle_data in stream_response.get('subtitles').items(): |
| 797 | + subtitles[lang] = [{ |
| 798 | + 'url': subtitle_data.get('url'), |
| 799 | + 'ext': subtitle_data.get('format') |
| 800 | + }] |
| 801 | + |
| 802 | + requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])] |
| 803 | + hardsub_preference = qualities(requested_hardsubs[::-1]) |
| 804 | + requested_formats = self._configuration_arg('format') or ['adaptive_hls'] |
| 805 | + |
| 806 | + formats = [] |
| 807 | + for stream_type, streams in stream_response.get('streams', {}).items(): |
| 808 | + if stream_type not in requested_formats: |
| 809 | + continue |
| 810 | + for stream in streams.values(): |
| 811 | + hardsub_lang = stream.get('hardsub_locale') or '' |
| 812 | + if hardsub_lang.lower() not in requested_hardsubs: |
| 813 | + continue |
| 814 | + format_id = join_nonempty( |
| 815 | + stream_type, |
| 816 | + format_field(stream, 'hardsub_locale', 'hardsub-%s')) |
| 817 | + if not stream.get('url'): |
| 818 | + continue |
| 819 | + if stream_type.split('_')[-1] == 'hls': |
| 820 | + adaptive_formats = self._extract_m3u8_formats( |
| 821 | + stream['url'], display_id, 'mp4', m3u8_id=format_id, |
| 822 | + note='Downloading %s information' % format_id, |
| 823 | + fatal=False) |
| 824 | + elif stream_type.split('_')[-1] == 'dash': |
| 825 | + adaptive_formats = self._extract_mpd_formats( |
| 826 | + stream['url'], display_id, mpd_id=format_id, |
| 827 | + note='Downloading %s information' % format_id, |
| 828 | + fatal=False) |
| 829 | + for f in adaptive_formats: |
| 830 | + if f.get('acodec') != 'none': |
| 831 | + f['language'] = stream_response.get('audio_locale') |
| 832 | + f['quality'] = hardsub_preference(hardsub_lang.lower()) |
| 833 | + formats.extend(adaptive_formats) |
| 834 | + self._sort_formats(formats) |
| 835 | + |
| 836 | + return { |
| 837 | + 'id': internal_id, |
| 838 | + 'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')), |
| 839 | + 'description': episode_response.get('description').replace(r'\r\n', '\n'), |
| 840 | + 'duration': float_or_none(episode_response.get('duration_ms'), 1000), |
| 841 | + 'thumbnails': thumbnails, |
| 842 | + 'series': episode_response.get('series_title'), |
| 843 | + 'series_id': episode_response.get('series_id'), |
| 844 | + 'season': episode_response.get('season_title'), |
| 845 | + 'season_id': episode_response.get('season_id'), |
| 846 | + 'season_number': episode_response.get('season_number'), |
| 847 | + 'episode': episode_response.get('title'), |
| 848 | + 'episode_number': episode_response.get('sequence_number'), |
| 849 | + 'subtitles': subtitles, |
| 850 | + 'formats': formats |
| 851 | + } |
743 | 852 |
|
744 | 853 |
|
745 | 854 | class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
|
0 commit comments