Skip to content

Commit 2b5f9cf

Browse files
s1as3rSilverarmorxnetcat
authored
Multiple Lyrics Providers (spotDL#1397)
Co-authored-by: Silverarmor <[email protected]> Co-authored-by: Jakub Kot <[email protected]>
1 parent 94ac18c commit 2b5f9cf

17 files changed

+43958
-59214
lines changed

spotdl/console/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def graceful_exit(signal, frame):
8383
arguments.output_format,
8484
arguments.use_youtube,
8585
arguments.generate_m3u,
86+
arguments.lyrics_provider,
8687
arguments.search_threads,
8788
)
8889

spotdl/download/embed_metadata.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ def _embed_mp3_cover(audio_file, song_object, converted_file_path):
153153
def _embed_mp3_lyrics(audio_file, song_object):
154154
# ! setting the lyrics
155155
lyrics = song_object.lyrics
156+
if not lyrics:
157+
return audio_file
158+
156159
USLTOutput = USLT(encoding=3, lang=u"eng", desc=u"desc", text=lyrics)
157160
audio_file["USLT::'eng'"] = USLTOutput
158161

spotdl/parsers/argument_parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,15 @@ def parse_arguments():
113113
"--use-youtube", help="Use youtube instead of YTM", action="store_true"
114114
)
115115

116+
# Option to select a lyrics provider
117+
parser.add_argument(
118+
"--lyrics-provider",
119+
help="Select a lyrics provider",
120+
type=str,
121+
choices=["genius", "musixmatch"],
122+
default="musixmatch",
123+
)
124+
116125
# Option to specify path to local ffmpeg
117126
parser.add_argument("-f", "--ffmpeg", help="Path to ffmpeg", dest="ffmpeg")
118127

spotdl/parsers/query_parser.py

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
from pathlib import Path
33

44
from spotdl.search import SongObject, song_gatherer
5-
from spotdl.providers import provider_utils, metadata_provider
5+
from spotdl.providers import lyrics_providers, metadata_provider
66

77

88
def parse_query(
9-
query: List[str], format, use_youtube, generate_m3u, threads
9+
query: List[str], format, use_youtube, generate_m3u, lyrics_provider, threads
1010
) -> List[SongObject]:
1111
"""
1212
Parse query and return list containing song object
@@ -20,7 +20,9 @@ def parse_query(
2020
continue
2121

2222
songs_list.extend(
23-
parse_request(request, format, use_youtube, generate_m3u, threads)
23+
parse_request(
24+
request, format, use_youtube, generate_m3u, lyrics_provider, threads
25+
)
2426
)
2527

2628
# linefeed to visually separate output for each query
@@ -42,6 +44,7 @@ def parse_request(
4244
output_format: str = None,
4345
use_youtube: bool = False,
4446
generate_m3u: bool = False,
47+
lyrics_provider: str = None,
4548
threads: int = 1,
4649
) -> List[SongObject]:
4750
song_list: List[SongObject] = []
@@ -59,39 +62,47 @@ def parse_request(
5962
print("Fetching YouTube video with spotify metadata")
6063
song_list = [
6164
song
62-
for song in [get_youtube_meta_track(urls[0], urls[1], output_format)]
65+
for song in [
66+
get_youtube_meta_track(
67+
urls[0], urls[1], output_format, lyrics_provider
68+
)
69+
]
6370
if song is not None
6471
]
6572
elif "open.spotify.com" in request and "track" in request:
6673
print("Fetching Song...")
67-
song = song_gatherer.from_spotify_url(request, output_format, use_youtube)
74+
song = song_gatherer.from_spotify_url(
75+
request, output_format, use_youtube, lyrics_provider
76+
)
6877
try:
6978
song_list = [song] if song.youtube_link is not None else []
7079
except (OSError, ValueError, LookupError):
7180
song_list = []
7281
elif "open.spotify.com" in request and "album" in request:
7382
print("Fetching Album...")
7483
song_list = song_gatherer.from_album(
75-
request, output_format, use_youtube, generate_m3u, threads
84+
request, output_format, use_youtube, lyrics_provider, generate_m3u, threads
7685
)
7786
elif "open.spotify.com" in request and "playlist" in request:
7887
print("Fetching Playlist...")
7988
song_list = song_gatherer.from_playlist(
80-
request, output_format, use_youtube, generate_m3u, threads
89+
request, output_format, use_youtube, lyrics_provider, generate_m3u, threads
8190
)
8291
elif "open.spotify.com" in request and "artist" in request:
8392
print("Fetching artist...")
8493
song_list = song_gatherer.from_artist(
85-
request, output_format, use_youtube, threads
94+
request, output_format, use_youtube, lyrics_provider, threads
8695
)
8796
elif request == "saved":
8897
print("Fetching Saved Songs...")
89-
song_list = song_gatherer.from_saved_tracks(output_format, use_youtube, threads)
98+
song_list = song_gatherer.from_saved_tracks(
99+
output_format, use_youtube, lyrics_provider, threads
100+
)
90101
else:
91102
print('Searching Spotify for song named "%s"...' % request)
92103
try:
93104
song_list = song_gatherer.from_search_term(
94-
request, output_format, use_youtube
105+
request, output_format, use_youtube, lyrics_provider
95106
)
96107
except Exception as e:
97108
print(e)
@@ -103,7 +114,10 @@ def parse_request(
103114

104115

105116
def get_youtube_meta_track(
106-
youtube_url: str, spotify_url: str, output_format: str = None
117+
youtube_url: str,
118+
spotify_url: str,
119+
output_format: str = None,
120+
lyrics_provider: str = None,
107121
):
108122
# check if URL is a playlist, user, artist or album, if yes raise an Exception,
109123
# else procede
@@ -129,8 +143,11 @@ def get_youtube_meta_track(
129143
print(f'Skipping "{converted_file_name}" as it\'s already downloaded')
130144
return None
131145

132-
# (try to) Get lyrics from Genius
133-
lyrics = provider_utils._get_song_lyrics(song_name, contributing_artist)
146+
# (try to) Get lyrics from musixmatch/genius
147+
if lyrics_provider == "genius":
148+
lyrics = lyrics_providers.get_lyrics_genius(song_name, contributing_artist)
149+
elif lyrics_provider == "musixmatch":
150+
lyrics = lyrics_providers.get_lyrics_musixmatch(song_name, contributing_artist)
134151

135152
return SongObject(
136153
raw_track_meta, raw_album_meta, raw_artist_meta, youtube_url, lyrics

spotdl/providers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from spotdl.providers.metadata_provider import from_url
22
from spotdl.providers import ytm_provider as youtube_music
3+
from spotdl.providers import lyrics_providers
34
from spotdl.providers.provider_utils import (
45
_create_song_title,
5-
_get_song_lyrics,
66
_match_percentage,
77
_parse_duration,
88
)

spotdl/providers/lyrics_providers.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
from requests import get
2+
from bs4 import BeautifulSoup
3+
from typing import List
4+
from urllib.parse import quote
5+
6+
user_agent = {
7+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
8+
(KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
9+
}
10+
11+
12+
def get_lyrics_genius(song_name: str, song_artists: List[str]) -> str:
13+
"""
14+
`str` `song_name` : name of song
15+
`list<str>` `song_artists` : list containing name of contributing artists
16+
RETURNS `str`: Lyrics of the song.
17+
Gets the lyrics of the song.
18+
"""
19+
headers = {
20+
"Authorization": "Bearer alXXDbPZtK1m2RrZ8I4k2Hn8Ahsd0Gh_o076HYvcdlBvmc0ULL1H8Z8xRlew5qaG",
21+
}
22+
headers.update(user_agent)
23+
24+
api_search_url = "https://api.genius.com/search"
25+
artist_str = ", ".join(
26+
artist for artist in song_artists if artist.lower() not in song_name.lower()
27+
)
28+
search_query = f"{song_name} {artist_str}"
29+
30+
api_response = get(api_search_url, params={"q": search_query}, headers=headers)
31+
if not api_response.ok:
32+
return ""
33+
api_json = api_response.json()
34+
35+
try:
36+
song_id = api_json["response"]["hits"][0]["result"]["id"]
37+
except (IndexError, KeyError):
38+
return ""
39+
40+
song_api_url = f"https://api.genius.com/songs/{song_id}"
41+
api_response = get(song_api_url, headers=headers)
42+
if not api_response.ok:
43+
return ""
44+
api_json = api_response.json()
45+
46+
song_url = api_json["response"]["song"]["url"]
47+
genius_page = get(song_url, headers=user_agent)
48+
if not genius_page.ok:
49+
return ""
50+
51+
soup = BeautifulSoup(genius_page.text.replace("<br/>", "\n"), "html.parser")
52+
lyrics_div = soup.select_one("div.lyrics")
53+
54+
if lyrics_div is not None:
55+
return lyrics_div.get_text().strip()
56+
57+
lyrics_containers = soup.select("div[class^=Lyrics__Container]")
58+
lyrics = "\n".join(con.get_text() for con in lyrics_containers)
59+
return lyrics.strip()
60+
61+
62+
def get_lyrics_musixmatch(
63+
song_name: str, song_artists: List[str], track_search=False
64+
) -> str:
65+
"""
66+
`str` `song_name` : Name of song
67+
`list<str>` `song_artists` : list containing name of contributing artists
68+
`bool` `track_search` : if `True`, search the musixmatch tracks page.
69+
RETURNS `str`: Lyrics of the song.
70+
Gets the lyrics of the song.
71+
"""
72+
# remove artist names that are already in the song_name
73+
# we do not use SongObject.create_file_name beacause it
74+
# removes '/' etc from the artist and song names.
75+
artists_str = ", ".join(
76+
artist for artist in song_artists if artist.lower() not in song_name.lower()
77+
)
78+
79+
# quote the query so that it's safe to use in a url
80+
# e.g "Au/Ra" -> "Au%2FRa"
81+
query = quote(f"{song_name} - {artists_str}", safe="")
82+
83+
# search the `tracks page` if track_search is True
84+
if track_search:
85+
query += "/tracks"
86+
87+
search_url = f"https://www.musixmatch.com/search/{query}"
88+
search_resp = get(search_url, headers=user_agent)
89+
if not search_resp.ok:
90+
return ""
91+
92+
search_soup = BeautifulSoup(search_resp.text, "html.parser")
93+
song_url_tag = search_soup.select_one("a[href^='/lyrics/']")
94+
95+
# song_url_tag being None means no results were found on the
96+
# All Results page, therefore, we use `track_search` to
97+
# search the tracks page.
98+
if song_url_tag is None:
99+
# track_serach being True means we are already searching the tracks page.
100+
if track_search:
101+
return ""
102+
103+
lyrics = get_lyrics_musixmatch(song_name, song_artists, track_search=True)
104+
return lyrics
105+
106+
song_url = "https://www.musixmatch.com" + song_url_tag.get("href")
107+
lyrics_resp = get(song_url, headers=user_agent)
108+
if not lyrics_resp.ok:
109+
return ""
110+
111+
lyrics_soup = BeautifulSoup(lyrics_resp.text, "html.parser")
112+
lyrics_paragraphs = lyrics_soup.select("p.mxm-lyrics__content")
113+
lyrics = "\n".join(i.get_text() for i in lyrics_paragraphs)
114+
115+
return lyrics

spotdl/providers/provider_utils.py

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
import requests
2-
1+
from pathlib import Path
32
from typing import List
3+
44
from thefuzz import fuzz
5-
from bs4 import BeautifulSoup
6-
from pathlib import Path
75

86

97
def _match_percentage(str1: str, str2: str, score_cutoff: float = 0) -> float:
@@ -71,47 +69,6 @@ def _create_song_title(song_name: str, song_artists: List[str]) -> str:
7169
return f"{joined_artists} - {song_name}"
7270

7371

74-
def _get_song_lyrics(song_name: str, song_artists: List[str]) -> str:
75-
"""
76-
`str` `song_name` : name of song
77-
78-
`list<str>` `song_artists` : list containing name of contributing artists
79-
80-
RETURNS `str`: Lyrics of the song.
81-
82-
Gets the metadata of the song.
83-
"""
84-
85-
headers = {
86-
"Authorization": "Bearer alXXDbPZtK1m2RrZ8I4k2Hn8Ahsd0Gh_o076HYvcdlBvmc0ULL1H8Z8xRlew5qaG",
87-
}
88-
api_search_url = "https://api.genius.com/search"
89-
search_query = f'{song_name} {", ".join(song_artists)}'
90-
91-
try:
92-
api_response = requests.get(
93-
api_search_url, params={"q": search_query}, headers=headers
94-
).json()
95-
96-
song_id = api_response["response"]["hits"][0]["result"]["id"]
97-
song_api_url = f"https://api.genius.com/songs/{song_id}"
98-
99-
api_response = requests.get(song_api_url, headers=headers).json()
100-
101-
song_url = api_response["response"]["song"]["url"]
102-
103-
genius_page = requests.get(song_url)
104-
soup = BeautifulSoup(genius_page.text, "html.parser")
105-
lyrics_div = soup.select_one("div.lyrics")
106-
107-
if lyrics_div is not None:
108-
return lyrics_div.get_text().strip()
109-
110-
return ""
111-
except: # noqa: E722
112-
return ""
113-
114-
11572
def _sanitize_filename(input_str: str) -> str:
11673
output = input_str
11774

0 commit comments

Comments
 (0)