Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Unreleased

* Ignore `data:` URIs.

2.4.0 (2025-09-28)

* Add index to Link (David Venhoff, #202)
Expand Down
4 changes: 2 additions & 2 deletions linkcheck/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def handle_starttag(self, tag, attrs):
self.text += f' [image:{src[0]}] '

def handle_endtag(self, tag):
if tag == 'a' and self.in_a:
if tag == 'a' and self.in_a and not self.url.startswith("data:"):
self.urls.append((self.text[:256], self.url))
self.in_a = False
self.text = ''
Expand All @@ -48,7 +48,7 @@ class ImageLister(Lister):
def handle_starttag(self, tag, attrs):
if tag == 'img':
src = [v for k, v in attrs if k == 'src']
if src:
if src and not src[0].startswith("data:"):
self.urls.append(('', src[0]))


Expand Down
13 changes: 13 additions & 0 deletions linkcheck/tests/test_linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,19 @@ def test_urls_exceeding_max_length(self):
)
self.assertEqual(Url.objects.all().count(), 1)

def test_data_urls_ignored(self):
self.assertEqual(Url.objects.all().count(), 0)
Book.objects.create(
title="My Title",
description=(
'This is a normal link: <a href="https://www.example.org">Example</a>, '
'This is a data link: <a href="">Example 2</a>, '
'This is a data img: <img src="">'
)
)
# Only the normal link is extracted
self.assertEqual(Url.objects.all().count(), 1)

def test_empty_url_field(self):
"""
Test that URLField empty content is excluded depending on ignore_empty list.
Expand Down
Loading