Skip to content

Commit 19afd9e

Browse files
authored
[GlomexEmbed] Avoid large match objects
Closes yt-dlp#2512 Authored by: zmousm
1 parent b72270d commit 19afd9e

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

yt_dlp/extractor/glomex.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,13 @@ def _extract_urls(cls, webpage, origin_url):
198198
)+</script>
199199
)''' % {'quot_re': r'["\']', 'url_re': VALID_SRC}
200200

201-
for mobj in re.finditer(EMBED_RE, webpage):
202-
mdict = mobj.groupdict()
201+
for mtup in re.findall(EMBED_RE, webpage):
202+
# re.finditer causes a memory spike. See https://github.com/yt-dlp/yt-dlp/issues/2512
203+
mdict = dict(zip((
204+
'url', '_',
205+
'html_tag', '_', 'integration_html', '_', 'id_html', '_', 'glomex_player',
206+
'script_tag', '_', '_', 'integration_js', '_', 'id_js',
207+
), mtup))
203208
if mdict.get('url'):
204209
url = unescapeHTML(mdict['url'])
205210
if not cls.suitable(url):

0 commit comments

Comments
 (0)