1
1
# coding: utf-8
2
2
from __future__ import unicode_literals
3
3
4
+ import functools
4
5
import re
5
6
6
7
from .common import InfoExtractor
7
- from ..compat import compat_str
8
8
from ..utils import (
9
9
clean_html ,
10
10
determine_ext ,
11
11
float_or_none ,
12
12
HEADRequest ,
13
+ InAdvancePagedList ,
13
14
int_or_none ,
14
15
join_nonempty ,
15
16
orderedSet ,
16
17
remove_end ,
18
+ smuggle_url ,
17
19
str_or_none ,
18
20
strip_jsonp ,
19
21
unescapeHTML ,
20
22
unified_strdate ,
23
+ unsmuggle_url ,
21
24
url_or_none ,
25
+ urljoin ,
22
26
)
23
27
24
28
25
29
class ORFTVthekIE (InfoExtractor ):
26
30
IE_NAME = 'orf:tvthek'
27
31
IE_DESC = 'ORF TVthek'
28
- _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+ (?P<id>\d+)'
32
+ _VALID_URL = r'(?P<url> https?://tvthek\.orf\.at/(?:(?: [^/]+/){2}){1,2} (?P<id>\d+))(/[^/]+/(?P<vid>\d+))?(?:$|[?#] )'
29
33
30
34
_TESTS = [{
35
+ 'url' : 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079' ,
36
+ 'info_dict' : {
37
+ 'id' : '14121079' ,
38
+ },
39
+ 'playlist_count' : 11 ,
40
+ 'params' : {'noplaylist' : True }
41
+ }, {
42
+ 'url' : 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150' ,
43
+ 'info_dict' : {
44
+ 'id' : '14121079' ,
45
+ },
46
+ 'playlist_count' : 1 ,
47
+ 'params' : {'playlist_items' : '5' }
48
+ }, {
49
+ 'url' : 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150' ,
50
+ 'info_dict' : {
51
+ 'id' : '14121079' ,
52
+ 'playlist_count' : 1
53
+ },
54
+ 'playlist' : [{
55
+ 'info_dict' : {
56
+ 'id' : '15083150' ,
57
+ 'ext' : 'mp4' ,
58
+ 'description' : 'md5:7be1c485425f5f255a5e4e4815e77d04' ,
59
+ 'thumbnail' : 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg' ,
60
+ 'title' : 'Umfrage: Welches Tier ist Sebastian Kurz?' ,
61
+ }
62
+ }],
63
+ 'playlist_count' : 1 ,
64
+ 'params' : {'noplaylist' : True , 'skip_download' : 'm3u8' }
65
+ }, {
31
66
'url' : 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389' ,
32
67
'playlist' : [{
33
68
'md5' : '2942210346ed779588f428a92db88712' ,
@@ -62,8 +97,90 @@ class ORFTVthekIE(InfoExtractor):
62
97
'only_matching' : True ,
63
98
}]
64
99
100
+ def _pagefunc (self , url , data_jsb , n , * , image = None ):
101
+ sd = data_jsb [n ]
102
+ video_id , title = str (sd ['id' ]), sd ['title' ]
103
+ formats = []
104
+ for fd in sd ['sources' ]:
105
+ src = url_or_none (fd .get ('src' ))
106
+ if not src :
107
+ continue
108
+ format_id = join_nonempty ('delivery' , 'quality' , 'quality_string' , from_dict = fd )
109
+ ext = determine_ext (src )
110
+ if ext == 'm3u8' :
111
+ m3u8_formats = self ._extract_m3u8_formats (
112
+ src , video_id , 'mp4' , m3u8_id = format_id , fatal = False , note = f'Downloading { format_id } m3u8 manifest' )
113
+ if any ('/geoprotection' in f ['url' ] for f in m3u8_formats ):
114
+ self .raise_geo_restricted ()
115
+ formats .extend (m3u8_formats )
116
+ elif ext == 'f4m' :
117
+ formats .extend (self ._extract_f4m_formats (
118
+ src , video_id , f4m_id = format_id , fatal = False ))
119
+ elif ext == 'mpd' :
120
+ formats .extend (self ._extract_mpd_formats (
121
+ src , video_id , mpd_id = format_id , fatal = False , note = f'Downloading { format_id } mpd manifest' ))
122
+ else :
123
+ formats .append ({
124
+ 'format_id' : format_id ,
125
+ 'url' : src ,
126
+ 'protocol' : fd .get ('protocol' ),
127
+ })
128
+
129
+ # Check for geoblocking.
130
+ # There is a property is_geoprotection, but that's always false
131
+ geo_str = sd .get ('geoprotection_string' )
132
+ http_url = next (
133
+ (f ['url' ] for f in formats if re .match (r'^https?://.*\.mp4$' , f ['url' ])),
134
+ None ) if geo_str else None
135
+ if http_url :
136
+ self ._request_webpage (
137
+ HEADRequest (http_url ), video_id , fatal = False , note = 'Testing for geoblocking' ,
138
+ errnote = f'This video seems to be blocked outside of { geo_str } . You may want to try the streaming-* formats' )
139
+
140
+ self ._sort_formats (formats )
141
+
142
+ subtitles = {}
143
+ for sub in sd .get ('subtitles' , []):
144
+ sub_src = sub .get ('src' )
145
+ if not sub_src :
146
+ continue
147
+ subtitles .setdefault (sub .get ('lang' , 'de-AT' ), []).append ({
148
+ 'url' : sub_src ,
149
+ })
150
+
151
+ upload_date = unified_strdate (sd .get ('created_date' ))
152
+
153
+ thumbnails = []
154
+ preview = sd .get ('preview_image_url' )
155
+ if preview :
156
+ thumbnails .append ({
157
+ 'id' : 'preview' ,
158
+ 'url' : preview ,
159
+ 'preference' : 0 ,
160
+ })
161
+ image = sd .get ('image_full_url' ) or image
162
+ if image :
163
+ thumbnails .append ({
164
+ 'id' : 'full' ,
165
+ 'url' : image ,
166
+ 'preference' : 1 ,
167
+ })
168
+
169
+ yield {
170
+ 'id' : video_id ,
171
+ 'title' : title ,
172
+ 'webpage_url' : smuggle_url (f'{ url } /part/{ video_id } ' , {'force_noplaylist' : True }),
173
+ 'formats' : formats ,
174
+ 'subtitles' : subtitles ,
175
+ 'description' : sd .get ('description' ),
176
+ 'duration' : int_or_none (sd .get ('duration_in_seconds' )),
177
+ 'upload_date' : upload_date ,
178
+ 'thumbnails' : thumbnails ,
179
+ }
180
+
65
181
def _real_extract (self , url ):
66
- playlist_id = self ._match_id (url )
182
+ url , smuggled_data = unsmuggle_url (url )
183
+ playlist_id , video_id , base_url = self ._match_valid_url (url ).group ('id' , 'vid' , 'url' )
67
184
webpage = self ._download_webpage (url , playlist_id )
68
185
69
186
data_jsb = self ._parse_json (
@@ -72,107 +189,16 @@ def _real_extract(self, url):
72
189
webpage , 'playlist' , group = 'json' ),
73
190
playlist_id , transform_source = unescapeHTML )['playlist' ]['videos' ]
74
191
75
- entries = []
76
- for sd in data_jsb :
77
- video_id , title = sd .get ('id' ), sd .get ('title' )
78
- if not video_id or not title :
79
- continue
80
- video_id = compat_str (video_id )
81
- formats = []
82
- for fd in sd ['sources' ]:
83
- src = url_or_none (fd .get ('src' ))
84
- if not src :
85
- continue
86
- format_id = join_nonempty ('delivery' , 'quality' , 'quality_string' , from_dict = fd )
87
- ext = determine_ext (src )
88
- if ext == 'm3u8' :
89
- m3u8_formats = self ._extract_m3u8_formats (
90
- src , video_id , 'mp4' , m3u8_id = format_id , fatal = False )
91
- if any ('/geoprotection' in f ['url' ] for f in m3u8_formats ):
92
- self .raise_geo_restricted ()
93
- formats .extend (m3u8_formats )
94
- elif ext == 'f4m' :
95
- formats .extend (self ._extract_f4m_formats (
96
- src , video_id , f4m_id = format_id , fatal = False ))
97
- elif ext == 'mpd' :
98
- formats .extend (self ._extract_mpd_formats (
99
- src , video_id , mpd_id = format_id , fatal = False ))
100
- else :
101
- formats .append ({
102
- 'format_id' : format_id ,
103
- 'url' : src ,
104
- 'protocol' : fd .get ('protocol' ),
105
- })
192
+ if not self ._yes_playlist (playlist_id , video_id , smuggled_data ):
193
+ data_jsb = [sd for sd in data_jsb if str (sd .get ('id' )) == video_id ]
106
194
107
- # Check for geoblocking.
108
- # There is a property is_geoprotection, but that's always false
109
- geo_str = sd .get ('geoprotection_string' )
110
- if geo_str :
111
- try :
112
- http_url = next (
113
- f ['url' ]
114
- for f in formats
115
- if re .match (r'^https?://.*\.mp4$' , f ['url' ]))
116
- except StopIteration :
117
- pass
118
- else :
119
- req = HEADRequest (http_url )
120
- self ._request_webpage (
121
- req , video_id ,
122
- note = 'Testing for geoblocking' ,
123
- errnote = ((
124
- 'This video seems to be blocked outside of %s. '
125
- 'You may want to try the streaming-* formats.' )
126
- % geo_str ),
127
- fatal = False )
128
-
129
- self ._check_formats (formats , video_id )
130
- self ._sort_formats (formats )
131
-
132
- subtitles = {}
133
- for sub in sd .get ('subtitles' , []):
134
- sub_src = sub .get ('src' )
135
- if not sub_src :
136
- continue
137
- subtitles .setdefault (sub .get ('lang' , 'de-AT' ), []).append ({
138
- 'url' : sub_src ,
139
- })
140
-
141
- upload_date = unified_strdate (sd .get ('created_date' ))
142
-
143
- thumbnails = []
144
- preview = sd .get ('preview_image_url' )
145
- if preview :
146
- thumbnails .append ({
147
- 'id' : 'preview' ,
148
- 'url' : preview ,
149
- 'preference' : 0 ,
150
- })
151
- image = sd .get ('image_full_url' )
152
- if not image and len (data_jsb ) == 1 :
153
- image = self ._og_search_thumbnail (webpage )
154
- if image :
155
- thumbnails .append ({
156
- 'id' : 'full' ,
157
- 'url' : image ,
158
- 'preference' : 1 ,
159
- })
160
-
161
- entries .append ({
162
- '_type' : 'video' ,
163
- 'id' : video_id ,
164
- 'title' : title ,
165
- 'formats' : formats ,
166
- 'subtitles' : subtitles ,
167
- 'description' : sd .get ('description' ),
168
- 'duration' : int_or_none (sd .get ('duration_in_seconds' )),
169
- 'upload_date' : upload_date ,
170
- 'thumbnails' : thumbnails ,
171
- })
195
+ playlist_count = len (data_jsb )
196
+ image = self ._og_search_thumbnail (webpage ) if playlist_count == 1 else None
172
197
198
+ page_func = functools .partial (self ._pagefunc , base_url , data_jsb , image = image )
173
199
return {
174
200
'_type' : 'playlist' ,
175
- 'entries' : entries ,
201
+ 'entries' : InAdvancePagedList ( page_func , playlist_count , 1 ) ,
176
202
'id' : playlist_id ,
177
203
}
178
204
0 commit comments