Skip to content

Commit 3451660

Browse files
authored
bugfix: artist matching improvements (spotDL#1417)
1 parent e00d344 commit 3451660

9 files changed

+44836
-27917
lines changed

spotdl/providers/provider_utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def _match_percentage(str1: str, str2: str, score_cutoff: float = 0) -> float:
2121

2222
# ! this will throw an error if either string contains a UTF-8 encoded emoji
2323
try:
24-
return fuzz.partial_ratio(str1, str2, score_cutoff=score_cutoff)
24+
return fuzz.partial_ratio(str1, str2, processor=None, score_cutoff=score_cutoff)
2525

2626
# ! we build new strings that contain only alphanumerical characters and spaces
2727
# ! and return the partial_ratio of that
@@ -38,7 +38,7 @@ def _match_percentage(str1: str, str2: str, score_cutoff: float = 0) -> float:
3838
if each_letter.isalnum() or each_letter.isspace()
3939
)
4040

41-
return fuzz.partial_ratio(new_str1, new_str2, score_cutoff=score_cutoff)
41+
return fuzz.partial_ratio(new_str1, new_str2, processor=None, score_cutoff=score_cutoff)
4242

4343

4444
def _parse_duration(duration: str) -> float:
@@ -135,7 +135,9 @@ def _parse_path_template(path_template, song_object, output_format, short=False)
135135
artist=_sanitize_filename(song_object.contributing_artists[0]),
136136
title=_sanitize_filename(song_object.song_name),
137137
album=_sanitize_filename(song_object.album_name),
138-
playlist=_sanitize_filename(song_object.playlist_name) if song_object.playlist_name else "",
138+
playlist=_sanitize_filename(song_object.playlist_name)
139+
if song_object.playlist_name
140+
else "",
139141
artists=_sanitize_filename(
140142
", ".join(song_object.contributing_artists)
141143
if short is False

spotdl/providers/ytm_provider.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -154,45 +154,38 @@ def _order_ytm_results(
154154

155155
# Find artist match
156156
# ! match = (no of artist names in result) / (no. of artist names on spotify) * 100
157-
artist_match_number = 0
157+
artist_match_number = 0.0
158158

159159
# ! we use fuzzy matching because YouTube spellings might be mucked up
160160
if result["type"] == "song":
161161
for artist in song_artists:
162-
if _match_percentage(
163-
unidecode(artist.lower()), unidecode(result["artist"]).lower(), 85
164-
):
165-
artist_match_number += 1
162+
artist_match_number += _match_percentage(
163+
unidecode(artist.lower()), unidecode(result["artist"]).lower()
164+
)
166165
else:
167166
# ! i.e if video
168167
for artist in song_artists:
169168
# ! something like _match_percentage('rionos', 'aiobahn, rionos Motivation
170169
# ! (remix)' would return 100, so we're absolutely corrent in matching
171170
# ! artists to song name.
172-
if _match_percentage(
173-
unidecode(artist.lower()), unidecode(result["name"]).lower(), 85
174-
):
175-
artist_match_number += 1
171+
artist_match_number += _match_percentage(
172+
unidecode(artist.lower()), unidecode(result["name"]).lower()
173+
)
176174

177175
# we didn't find artist in the video title, so we fallback to
178176
# detecting song artist in the channel name
179177
# I am not sure if this won't create false positives
180178
if artist_match_number == 0:
181179
for artist in song_artists:
182-
if _match_percentage(
180+
artist_match_number += _match_percentage(
183181
unidecode(artist.lower()),
184182
unidecode(result["artist"].lower()),
185-
85,
186-
):
187-
artist_match_number += 1
183+
)
188184

189-
# ! Skip if there are no artists in common, (else, results like 'Griffith Swank -
190-
# ! Madness' will be the top match for 'Ruelle - Madness')
191-
if artist_match_number == 0:
185+
artist_match = artist_match_number / len(song_artists)
186+
if artist_match < 70:
192187
continue
193188

194-
artist_match = (artist_match_number / len(song_artists)) * 100
195-
196189
song_title = _create_song_title(song_name, song_artists).lower()
197190

198191
# Find name match and drop results below 60%

tests/cassettes/test_download_a_playlist.yaml

Lines changed: 17896 additions & 9580 deletions
Large diffs are not rendered by default.

tests/cassettes/test_download_a_single_song.yaml

Lines changed: 1078 additions & 458 deletions
Large diffs are not rendered by default.

tests/cassettes/test_download_an_album.yaml

Lines changed: 20096 additions & 13208 deletions
Large diffs are not rendered by default.

tests/cassettes/test_download_long_artists_song.yaml

Lines changed: 1701 additions & 1856 deletions
Large diffs are not rendered by default.

tests/cassettes/test_download_single_song.yaml

Lines changed: 1697 additions & 1889 deletions
Large diffs are not rendered by default.

tests/cassettes/test_multiple_elements.yaml

Lines changed: 2350 additions & 903 deletions
Large diffs are not rendered by default.

tests/cassettes/test_search_and_download.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ interactions:
77
Accept-Encoding:
88
- gzip, deflate
99
Authorization:
10-
- Bearer BQBjXTkWDqZSM5LBXmnnjNmgd6G43iNiKTni8W1mriEGpuuEz6zHA2ovAPCO2NFT3mmhad8J_PuRDC6BmfE
10+
- Bearer BQAzYDtBJuYcuT7R6wj4mCduSrbeVHFdATVu6hZIPxK4Spv02qOAoAqOZXSPy4yBtobnSGUv37CLzOmhqXk
1111
Connection:
1212
- keep-alive
1313
Content-Type:
@@ -48,7 +48,7 @@ interactions:
4848
content-type:
4949
- application/json; charset=utf-8
5050
date:
51-
- Fri, 22 Oct 2021 19:20:01 GMT
51+
- Sat, 20 Nov 2021 12:46:27 GMT
5252
server:
5353
- envoy
5454
strict-transport-security:

0 commit comments

Comments
 (0)