Skip to content

Commit fb62afd

Browse files
authored
[Musicdex] Add extractors (yt-dlp#2421)
Closes yt-dlp#2204 Authored by: Ashish0804
1 parent 50600e8 commit fb62afd

File tree

2 files changed

+181
-0
lines changed

2 files changed

+181
-0
lines changed

yt_dlp/extractor/extractors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,12 @@
881881
)
882882
from .muenchentv import MuenchenTVIE
883883
from .musescore import MuseScoreIE
884+
from .musicdex import (
885+
MusicdexSongIE,
886+
MusicdexAlbumIE,
887+
MusicdexArtistIE,
888+
MusicdexPlaylistIE,
889+
)
884890
from .mwave import MwaveIE, MwaveMeetGreetIE
885891
from .mxplayer import (
886892
MxplayerIE,

yt_dlp/extractor/musicdex.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
# coding: utf-8
2+
from __future__ import unicode_literals
3+
4+
from .common import InfoExtractor
5+
from ..utils import (
6+
date_from_str,
7+
format_field,
8+
try_get,
9+
unified_strdate,
10+
)
11+
12+
13+
class MusicdexBaseIE(InfoExtractor):
14+
def _return_info(self, track_json, album_json, id):
15+
return {
16+
'id': str(id),
17+
'title': track_json.get('name'),
18+
'track': track_json.get('name'),
19+
'description': track_json.get('description'),
20+
'track_number': track_json.get('number'),
21+
'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
22+
'duration': track_json.get('duration'),
23+
'genre': [genre.get('name') for genre in track_json.get('genres') or []],
24+
'like_count': track_json.get('likes_count'),
25+
'view_count': track_json.get('plays'),
26+
'artist': [artist.get('name') for artist in track_json.get('artists') or []],
27+
'album_artist': [artist.get('name') for artist in album_json.get('artists') or []],
28+
'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
29+
'album': album_json.get('name'),
30+
'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
31+
'extractor_key': MusicdexSongIE.ie_key(),
32+
'extractor': 'MusicdexSong',
33+
}
34+
35+
36+
class MusicdexSongIE(MusicdexBaseIE):
37+
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)'
38+
39+
_TESTS = [{
40+
'url': 'https://www.musicdex.org/track/306/dual-existence',
41+
'info_dict': {
42+
'id': '306',
43+
'ext': 'mp3',
44+
'title': 'dual existence',
45+
'description': '#NIPPONSEI @ IRC.RIZON.NET',
46+
'track': 'dual existence',
47+
'track_number': 1,
48+
'duration': 266000,
49+
'genre': ['Anime'],
50+
'like_count': int,
51+
'view_count': int,
52+
'artist': ['fripSide'],
53+
'album_artist': ['fripSide'],
54+
'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
55+
'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
56+
'release_year': 2020
57+
},
58+
'params': {'skip_download': True}
59+
}]
60+
61+
def _real_extract(self, url):
62+
id = self._match_id(url)
63+
data_json = self._download_json(f'https://www.musicdex.org/secure/tracks/{id}?defaultRelations=true', id)['track']
64+
return self._return_info(data_json, data_json.get('album') or {}, id)
65+
66+
67+
class MusicdexAlbumIE(MusicdexBaseIE):
68+
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)'
69+
70+
_TESTS = [{
71+
'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo',
72+
'playlist_mincount': 28,
73+
'info_dict': {
74+
'id': '56',
75+
'genre': ['OST'],
76+
'view_count': int,
77+
'artist': ['TENMON & Eiichiro Yanagi / minori'],
78+
'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
79+
'release_year': 2008,
80+
'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
81+
},
82+
}]
83+
84+
def _real_extract(self, url):
85+
id = self._match_id(url)
86+
data_json = self._download_json(f'https://www.musicdex.org/secure/albums/{id}?defaultRelations=true', id)['album']
87+
entries = [self._return_info(track, data_json, track['id']) for track in data_json.get('tracks') or [] if track.get('id')]
88+
89+
return {
90+
'_type': 'playlist',
91+
'id': id,
92+
'title': data_json.get('name'),
93+
'description': data_json.get('description'),
94+
'genre': [genre.get('name') for genre in data_json.get('genres') or []],
95+
'view_count': data_json.get('plays'),
96+
'artist': [artist.get('name') for artist in data_json.get('artists') or []],
97+
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
98+
'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
99+
'entries': entries,
100+
}
101+
102+
103+
class MusicdexPageIE(MusicdexBaseIE):
104+
def _entries(self, id):
105+
next_page_url = self._API_URL % id
106+
while next_page_url:
107+
data_json = self._download_json(next_page_url, id)['pagination']
108+
for data in data_json.get('data') or []:
109+
yield data
110+
next_page_url = data_json.get('next_page_url')
111+
112+
113+
class MusicdexArtistIE(MusicdexPageIE):
114+
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)'
115+
_API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1'
116+
117+
_TESTS = [{
118+
'url': 'https://www.musicdex.org/artist/11/fripside',
119+
'playlist_mincount': 28,
120+
'info_dict': {
121+
'id': '11',
122+
'view_count': int,
123+
'title': 'fripSide',
124+
'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg',
125+
},
126+
}]
127+
128+
def _real_extract(self, url):
129+
id = self._match_id(url)
130+
data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{id}', id)['artist']
131+
entries = []
132+
for album in self._entries(id):
133+
entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id'))
134+
135+
return {
136+
'_type': 'playlist',
137+
'id': id,
138+
'title': data_json.get('name'),
139+
'view_count': data_json.get('plays'),
140+
'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'),
141+
'entries': entries,
142+
}
143+
144+
145+
class MusicdexPlaylistIE(MusicdexPageIE):
146+
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)'
147+
_API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1'
148+
149+
_TESTS = [{
150+
'url': 'https://www.musicdex.org/playlist/9/test',
151+
'playlist_mincount': 73,
152+
'info_dict': {
153+
'id': '9',
154+
'view_count': int,
155+
'title': 'Test',
156+
'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg',
157+
'description': 'Test 123 123 21312 32121321321321312',
158+
},
159+
}]
160+
161+
def _real_extract(self, url):
162+
id = self._match_id(url)
163+
data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{id}', id)['playlist']
164+
entries = [self._return_info(track, track.get('album') or {}, track['id'])
165+
for track in self._entries(id) or [] if track.get('id')]
166+
167+
return {
168+
'_type': 'playlist',
169+
'id': id,
170+
'title': data_json.get('name'),
171+
'description': data_json.get('description'),
172+
'view_count': data_json.get('plays'),
173+
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
174+
'entries': entries,
175+
}

0 commit comments

Comments
 (0)