diff --git a/CHANGELOG b/CHANGELOG
index 3d12741..52629da 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,7 @@
+Unreleased
+
+* Ignore `data:` URIs.
+
2.4.0 (2025-09-28)
* Add index to Link (David Venhoff, #202)
diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py
index ba0949e..9967cdb 100644
--- a/linkcheck/__init__.py
+++ b/linkcheck/__init__.py
@@ -32,7 +32,7 @@ def handle_starttag(self, tag, attrs):
self.text += f' [image:{src[0]}] '
def handle_endtag(self, tag):
- if tag == 'a' and self.in_a:
+ if tag == 'a' and self.in_a and not self.url.startswith("data:"):
self.urls.append((self.text[:256], self.url))
self.in_a = False
self.text = ''
@@ -48,7 +48,7 @@ class ImageLister(Lister):
def handle_starttag(self, tag, attrs):
if tag == 'img':
src = [v for k, v in attrs if k == 'src']
- if src:
+ if src and not src[0].startswith("data:"):
self.urls.append(('', src[0]))
diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py
index d0b005d..3dd3f57 100644
--- a/linkcheck/tests/test_linkcheck.py
+++ b/linkcheck/tests/test_linkcheck.py
@@ -943,6 +943,19 @@ def test_urls_exceeding_max_length(self):
)
self.assertEqual(Url.objects.all().count(), 1)
+ def test_data_urls_ignored(self):
+ self.assertEqual(Url.objects.all().count(), 0)
+ Book.objects.create(
+ title="My Title",
+ description=(
+ 'This is a normal link: Example, '
+ 'This is a data link: Example 2, '
+ 'This is a data img:
'
+ )
+ )
+ # Only the normal link is extracted
+ self.assertEqual(Url.objects.all().count(), 1)
+
def test_empty_url_field(self):
"""
Test that URLField empty content is excluded depending on ignore_empty list.