From 05c452dd1e02cf4fe0fe3e29a41834768f6bf65b Mon Sep 17 00:00:00 2001 From: andybak Date: Mon, 12 Jun 2017 14:05:17 +0100 Subject: [PATCH 001/188] handle cases where get_absolute_url returns None. Remove pointless test for empty queryset. --- linkcheck/listeners.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index 3e41d84..e3714dd 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -141,11 +141,11 @@ def instance_pre_save(sender, instance, ModelCls=linklist_cls.model, **kwargs): if previous_url == current_url: return else: - old_urls = Url.objects.filter(url__startswith=previous_url) - if old_urls: + if previous_url: + old_urls = Url.objects.filter(url__startswith=previous_url) old_urls.update(status=False, message='Broken internal link') - new_urls = Url.objects.filter(url__startswith=current_url) - if new_urls: + if current_url: + new_urls = Url.objects.filter(url__startswith=current_url) # Mark these urls' status as False, so that post_save will check them new_urls.update(status=False, message='Should be checked now!') From 27e424037720ab3be0847315f0ea3d0e973edb7c Mon Sep 17 00:00:00 2001 From: andybak Date: Mon, 12 Jun 2017 14:10:08 +0100 Subject: [PATCH 002/188] Be conservative and assume empty strings are valid urls (could possibly mean the root url in some case) --- linkcheck/listeners.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index e3714dd..04668c9 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -141,10 +141,10 @@ def instance_pre_save(sender, instance, ModelCls=linklist_cls.model, **kwargs): if previous_url == current_url: return else: - if previous_url: + if previous_url is not None: old_urls = Url.objects.filter(url__startswith=previous_url) old_urls.update(status=False, message='Broken internal link') - if current_url: + if current_url is not None: new_urls = Url.objects.filter(url__startswith=current_url) # Mark these urls' status as False, so that post_save will check them new_urls.update(status=False, message='Should be checked now!') From 146542b9887da5b414365f5d7ff9499fed1fdb0a Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Wed, 28 Jun 2017 12:02:27 +0800 Subject: [PATCH 003/188] do not report tel:{phone number} links as invalid --- linkcheck/models.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/linkcheck/models.py b/linkcheck/models.py index 6c5d12b..3b85e08 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -102,6 +102,8 @@ def type(self): return 'external' if self.url.startswith('mailto'): return 'mailto' + if self.url.startswith('tel'): + return 'phone' elif str(self.url)=='': return 'empty' elif self.url.startswith('#'): @@ -193,6 +195,10 @@ def _check_internal(self, tested_url): self.status = None self.message = 'Email link (not automatically checked)' + elif tested_url.startswith('tel:'): + self.status = None + self.message = 'Phone number (not automatically checked)' + elif tested_url.startswith('#'): self.status = None self.message = 'Link to within the same page (not automatically checked)' From d63de2b6869835fafab9876536aae9cfa3c550a7 Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Wed, 28 Jun 2017 12:29:46 +0800 Subject: [PATCH 004/188] add a setting to control if we follow redirect for internal links --- linkcheck/linkcheck_settings.py | 1 + linkcheck/models.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/linkcheck/linkcheck_settings.py b/linkcheck/linkcheck_settings.py index 9f1bde5..b51fd59 100644 --- a/linkcheck/linkcheck_settings.py +++ b/linkcheck/linkcheck_settings.py @@ -58,3 +58,4 @@ RESULTS_PER_PAGE = getattr(settings, 'LINKCHECK_RESULTS_PER_PAGE', 500) SITE_DOMAINS = getattr(settings, 'LINKCHECK_SITE_DOMAINS', []) DISABLE_LISTENERS = getattr(settings, 'LINKCHECK_DISABLE_LISTENERS', False) +FOLLOW_REDIRECT = getattr(settings, 'LINKCHECK_FOLLOW_REDIRECT', False) \ No newline at end of file diff --git a/linkcheck/models.py b/linkcheck/models.py index 3b85e08..f4dffb3 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -38,6 +38,7 @@ EXTERNAL_REGEX_STRING, EXTERNAL_RECHECK_INTERVAL, LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, + FOLLOW_REDIRECT, ) logger = logging.getLogger('linkcheck') @@ -239,7 +240,7 @@ def _check_internal(self, tested_url): settings.PREPEND_WWW = False c = Client() c.handler = LinkCheckHandler() - response = c.get(tested_url) + response = c.get(tested_url, follow=FOLLOW_REDIRECT) if USE_REVERSION: # using test client will clear the RevisionContextManager stack. revision_context_manager.start() From cc90b454b5b6fe5a49776df134d4b708092ada39 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 28 Jun 2017 11:29:19 +0200 Subject: [PATCH 005/188] Add Django 1.11 target in Travis config --- .travis.yml | 8 +++++--- runtests.py | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 093e330..c0353c9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,10 +2,12 @@ language: python python: - 2.7 - 3.4 + - 3.5 env: - - DJANGO=django==1.8.17 - - DJANGO=django==1.9.12 - - DJANGO=django==1.10.5 + - DJANGO=django==1.8.18 + - DJANGO=django==1.9.13 + - DJANGO=django==1.10.7 + - DJANGO=django==1.11.2 matrix: fast_finish: true install: diff --git a/runtests.py b/runtests.py index 2e33c13..6f7bcb0 100644 --- a/runtests.py +++ b/runtests.py @@ -27,6 +27,12 @@ 'TEMPLATES': [{ 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.contrib.auth.context_processors.auth', + 'django.template.context_processors.static', + ], + }, }], } # Django < 1.10 compatibility From 8c8e7113d1bbaca912327f15802771a2c5a0f4ce Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 28 Jun 2017 12:09:56 +0200 Subject: [PATCH 006/188] Simplified listener registrations Instead of creating listener functions inside a for loop, take advantage of the _linklist model annotation to access linklists. --- linkcheck/listeners.py | 269 +++++++++++++++++++++-------------------- 1 file changed, 136 insertions(+), 133 deletions(-) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index cb22487..f12de5d 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -29,6 +29,7 @@ tasks_queue = Queue.LifoQueue() worker_running = False +tests_running = len(sys.argv) > 1 and sys.argv[1] == 'test' or sys.argv[0].endswith('runtests.py') def linkcheck_worker(): @@ -49,150 +50,152 @@ def start_worker(): t.start() -listeners = [] +def check_instance_links(sender, instance, **kwargs): + """ + When an object is saved: + new Link/Urls are created, checked + When an object is modified: + new link/urls are created, checked + existing link/urls are checked + Removed links are deleted + """ + linklist_cls = sender._linklist -# 1. register listeners for the objects that contain Links + def do_check_instance_links(sender, instance, wait=False): + # On some installations, this wait time might be enough for the + # thread transaction to account for the object change (GH #41). + # A candidate for the future post_commit signal. -for linklist_name, linklist_cls in apps.get_app_config('linkcheck').all_linklists.items(): + global worker_running - def check_instance_links(sender, instance, linklist_cls=linklist_cls, **kwargs): - """ - When an object is saved: - new Link/Urls are created, checked - - When an object is modified: - new link/urls are created, checked - existing link/urls are checked - Removed links are deleted - """ - - def do_check_instance_links(sender, instance, linklist_cls=linklist_cls, wait=False): - # On some installations, this wait time might be enough for the - # thread transaction to account for the object change (GH #41). - # A candidate for the future post_commit signal. - - global worker_running - - if wait: - time.sleep(0.1) - with update_lock: - content_type = linklist_cls.content_type() - new_links = [] - old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) - - linklists = linklist_cls().get_linklist(extra_filter={'pk':instance.pk,}) - - if not linklists: - # This object is no longer watched by linkcheck according to object_filter - links = [] - else: - linklist = linklists[0] - links = linklist['urls']+linklist['images'] - - for link in links: - # url structure = (field, link text, url) - url = link[2] - internal_hash = False - if url.startswith('#'): - internal_hash = url - url = instance.get_absolute_url() + url - u, created = Url.objects.get_or_create(url=url) - l, created = Link.objects.get_or_create(url=u, field=link[0], text=link[1], content_type=content_type, object_id=instance.pk) - new_links.append(l.id) - u.still_exists = True - if internal_hash: - setattr(u, '_internal_hash', internal_hash) - setattr(u, '_instance', instance) - u.check_url() - - gone_links = old_links.exclude(id__in=new_links) - gone_links.delete() - - # Don't run in a separate thread if we are running tests - if len(sys.argv) > 1 and sys.argv[1] == 'test' or sys.argv[0].endswith('runtests.py'): - do_check_instance_links(sender, instance, linklist_cls) - else: - tasks_queue.put({'target': do_check_instance_links, 'args': (sender, instance, linklist_cls, True), 'kwargs': {}}) - start_worker() + if wait: + time.sleep(0.1) + with update_lock: + content_type = linklist_cls.content_type() + new_links = [] + old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) + + linklists = linklist_cls().get_linklist(extra_filter={'pk':instance.pk,}) + + if not linklists: + # This object is no longer watched by linkcheck according to object_filter + links = [] + else: + linklist = linklists[0] + links = linklist['urls']+linklist['images'] + + for link in links: + # url structure = (field, link text, url) + url = link[2] + internal_hash = False + if url.startswith('#'): + internal_hash = url + url = instance.get_absolute_url() + url + u, created = Url.objects.get_or_create(url=url) + l, created = Link.objects.get_or_create(url=u, field=link[0], text=link[1], content_type=content_type, object_id=instance.pk) + new_links.append(l.id) + u.still_exists = True + if internal_hash: + setattr(u, '_internal_hash', internal_hash) + setattr(u, '_instance', instance) + u.check_url() + + gone_links = old_links.exclude(id__in=new_links) + gone_links.delete() + + # Don't run in a separate thread if we are running tests + if tests_running: + do_check_instance_links(sender, instance) + else: + tasks_queue.put({ + 'target': do_check_instance_links, + 'args': (sender, instance, True), + 'kwargs': {} + }) + start_worker() + + +def delete_instance_links(sender, instance, **kwargs): + """ + Delete all links belonging to a model instance when that instance is deleted + """ + linklist_cls = sender._linklist + content_type = linklist_cls.content_type() + old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) + old_links.delete() + + +def instance_pre_save(sender, instance, **kwargs): + if not instance.pk: + # Ignore unsaved instances + return + current_url = instance.get_absolute_url() + previous_url = sender.objects.get(pk=instance.pk).get_absolute_url() + setattr(instance, '__previous_url', previous_url) + if previous_url == current_url: + return + else: + if previous_url is not None: + old_urls = Url.objects.filter(url__startswith=previous_url) + old_urls.update(status=False, message='Broken internal link') + if current_url is not None: + new_urls = Url.objects.filter(url__startswith=current_url) + # Mark these urls' status as False, so that post_save will check them + new_urls.update(status=False, message='Should be checked now!') + + +def instance_post_save(sender, instance, **kwargs): + def do_instance_post_save(sender, instance, **kwargs): + current_url = instance.get_absolute_url() + previous_url = getattr(instance, '__previous_url', None) + # We assume returning None from get_absolute_url means that this instance doesn't have a URL + # Not sure if we should do the same for '' as this could refer to '/' + if current_url is not None and current_url != previous_url: + linklist_cls = sender._linklist + active = linklist_cls.objects().filter(pk=instance.pk).count() + + if kwargs['created'] or (not active): + new_urls = Url.objects.filter(url__startswith=current_url) + else: + new_urls = Url.objects.filter(status=False).filter(url__startswith=current_url) + if new_urls: + for url in new_urls: + url.check_url() - listeners.append(check_instance_links) - model_signals.post_save.connect(listeners[-1], sender=linklist_cls.model) + if tests_running: + do_instance_post_save(sender, instance, **kwargs) + else: + tasks_queue.put({ + 'target': do_instance_post_save, + 'args': (sender, instance), + 'kwargs': kwargs + }) + start_worker() - def delete_instance_links(sender, instance, linklist_cls=linklist_cls, **kwargs): - """ - Delete all links belonging to a model instance when that instance is deleted - """ - content_type = linklist_cls.content_type() - old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) - old_links.delete() - listeners.append(delete_instance_links) - model_signals.post_delete.connect(listeners[-1], sender=linklist_cls.model) +def instance_pre_delete(sender, instance, **kwargs): + instance.linkcheck_deleting = True + deleted_url = instance.get_absolute_url() + if deleted_url: + old_urls = Url.objects.filter(url__startswith=deleted_url).exclude(status=False) + if old_urls: + old_urls.update(status=False, message='Broken internal link') -# 2. register listeners for the objects that are targets of Links, only when get_absolute_url() is defined for the model - if getattr(linklist_cls.model, 'get_absolute_url', None): +# 1. register listeners for the objects that contain Links +for linklist_name, linklist_cls in apps.get_app_config('linkcheck').all_linklists.items(): + model_signals.post_save.connect(check_instance_links, sender=linklist_cls.model) + model_signals.post_delete.connect(delete_instance_links, sender=linklist_cls.model) - def instance_pre_save(sender, instance, ModelCls=linklist_cls.model, **kwargs): - if instance.pk: # Ignore unsaved instances - current_url = instance.get_absolute_url() - previous = ModelCls.objects.get(pk=instance.pk) - previous_url = previous.get_absolute_url() - setattr(instance, '__previous_url', previous_url) - if previous_url == current_url: - return - else: - if previous_url is not None: - old_urls = Url.objects.filter(url__startswith=previous_url) - old_urls.update(status=False, message='Broken internal link') - if current_url is not None: - new_urls = Url.objects.filter(url__startswith=current_url) - # Mark these urls' status as False, so that post_save will check them - new_urls.update(status=False, message='Should be checked now!') - - listeners.append(instance_pre_save) - model_signals.pre_save.connect(listeners[-1], sender=linklist_cls.model) - - - def instance_post_save(sender, instance, ModelCls=linklist_cls.model, linklist=linklist_cls, **kwargs): - def do_instance_post_save(sender, instance, ModelCls=ModelCls, linklist=linklist_cls, **kwargs): - current_url = instance.get_absolute_url() - previous_url = getattr(instance, '__previous_url', None) - # We assume returning None from get_absolute_url means that this instance doesn't have a URL - # Not sure if we should do the same for '' as this could refer to '/' - if current_url is not None and current_url != previous_url: - - active = linklist.objects().filter(pk=instance.pk).count() - - if kwargs['created'] or (not active): - new_urls = Url.objects.filter(url__startswith=current_url) - else: - new_urls = Url.objects.filter(status=False).filter(url__startswith=current_url) - - if new_urls: - for url in new_urls: - url.check_url() - if len(sys.argv)>1 and sys.argv[1] == 'test' or sys.argv[0] == 'runtests.py': - do_instance_post_save(sender, instance, ModelCls, **kwargs) - else: - tasks_queue.put({'target': do_instance_post_save, 'args': (sender, instance, ModelCls,), 'kwargs': kwargs}) - start_worker() - - listeners.append(instance_post_save) - model_signals.post_save.connect(listeners[-1], sender=linklist_cls.model) - - - def instance_pre_delete(sender, instance, ModelCls=linklist_cls.model, **kwargs): - instance.linkcheck_deleting = True - deleted_url = instance.get_absolute_url() - if deleted_url: - old_urls = Url.objects.filter(url__startswith=deleted_url).exclude(status=False) - if old_urls: - old_urls.update(status=False, message='Broken internal link') - listeners.append(instance_pre_delete) - model_signals.pre_delete.connect(listeners[-1], sender=linklist_cls.model) + # 2. register listeners for the objects that are targets of Links, + # only when get_absolute_url() is defined for the model + + if getattr(linklist_cls.model, 'get_absolute_url', None): + model_signals.pre_save.connect(instance_pre_save, sender=linklist_cls.model) + model_signals.post_save.connect(instance_post_save, sender=linklist_cls.model) + model_signals.pre_delete.connect(instance_pre_delete, sender=linklist_cls.model) # Integrate with django-filebrowser if present From 0855c89adbdac26ff3dfdeee6ded8622fb39162f Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Mon, 10 Jul 2017 15:12:58 +0200 Subject: [PATCH 007/188] Added compatibility with current Django master --- linkcheck/dashboard.py | 6 +++++- linkcheck/tests/test_linkcheck.py | 6 +++++- linkcheck/tests/urls.py | 4 ++-- linkcheck/views.py | 6 +++++- runtests.py | 1 - 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/linkcheck/dashboard.py b/linkcheck/dashboard.py index a1fd2d1..4a7afcd 100644 --- a/linkcheck/dashboard.py +++ b/linkcheck/dashboard.py @@ -1,7 +1,11 @@ from admin_tools.dashboard import modules -from django.core.urlresolvers import reverse from linkcheck.views import get_status_message +try: + from django.urls import reverse +except ImportError: # Django < 1.10 + from django.core.urlresolvers import reverse + linkcheck_dashboard_module = modules.LinkList( title="Linkchecker", diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 39a1081..463ed4d 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -10,13 +10,17 @@ from django.conf import settings from django.contrib.auth.models import User from django.core.management import call_command -from django.core.urlresolvers import reverse from django.test import LiveServerTestCase, TestCase from django.test.utils import override_settings from django.utils.six import StringIO from django.utils.six.moves.urllib import request from django.utils.six.moves.urllib.error import HTTPError +try: + from django.urls import reverse +except ImportError: # Django < 1.10 + from django.core.urlresolvers import reverse + from linkcheck.models import Link, Url from linkcheck.views import get_jquery_min_js diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py index b9cd744..c7bb8d8 100644 --- a/linkcheck/tests/urls.py +++ b/linkcheck/tests/urls.py @@ -7,11 +7,11 @@ from linkcheck.tests.sampleapp import views -handler404 = lambda x: http.HttpResponseNotFound('') +handler404 = lambda x, **kwargs: http.HttpResponseNotFound('') urlpatterns = [ url(http://github.com/r'%5Eadmin/linkcheck/',%20include('linkcheck.urls')), - url(http://github.com/r'%5Eadmin/',%20include(admin.site.urls)), + url(http://github.com/r'%5Eadmin/',%20admin.site.urls), url(http://github.com/r'%5Epublic/',%20views.http_response,%20%7B'code':%20'200'%7D), url(http://github.com/r'%5Ehttp/(?P%3Ccode%3E\d+)/$', views.http_response), url(http://github.com/r'%5Ehttp/(?P%3Ccode%3E\d+)/rückmeldung/$', views.http_response), diff --git a/linkcheck/views.py b/linkcheck/views.py index a3a8371..2552c35 100644 --- a/linkcheck/views.py +++ b/linkcheck/views.py @@ -10,11 +10,15 @@ from django.contrib.contenttypes.models import ContentType from django.core.exceptions import ObjectDoesNotExist from django.core.paginator import Paginator -from django.core.urlresolvers import reverse, NoReverseMatch from django.http import HttpResponse from django.shortcuts import render from django.views.decorators.csrf import csrf_exempt +try: + from django.urls import reverse, NoReverseMatch +except ImportError: # Django < 1.10 + from django.core.urlresolvers import reverse, NoReverseMatch + from linkcheck import update_lock from linkcheck.linkcheck_settings import RESULTS_PER_PAGE from linkcheck.models import Link diff --git a/runtests.py b/runtests.py index 6f7bcb0..d23c9bc 100644 --- a/runtests.py +++ b/runtests.py @@ -22,7 +22,6 @@ 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.auth.middleware.SessionAuthenticationMiddleware', ], 'TEMPLATES': [{ 'BACKEND': 'django.template.backends.django.DjangoTemplates', From 3fddd44e8d154537e9b52d5b3fdff851d1a0f919 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Mon, 10 Jul 2017 15:35:34 +0200 Subject: [PATCH 008/188] Fixed #73 -- Evaluated checklinks limit after real checks are done --- linkcheck/tests/test_linkcheck.py | 9 +++++---- linkcheck/utils.py | 6 ++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 463ed4d..b756978 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -212,7 +212,8 @@ def setUp(self): def test_checklinks_command(self): Book.objects.create(title='My Title', description=""" - Here's a link: Example, + Here's an external link: External, + an internal link: Internal, and an image: logo""") out = StringIO() @@ -220,7 +221,7 @@ def test_checklinks_command(self): self.assertEqual( out.getvalue(), "Checking all links that haven't been tested for 10080 minutes.\n" - "0 internal URLs and 0 external URLs have been checked.\n" + "1 internal URLs and 0 external URLs have been checked.\n" ) yesterday = datetime.now() - timedelta(days=1) @@ -230,7 +231,7 @@ def test_checklinks_command(self): self.assertEqual( out.getvalue(), "Checking all links that haven't been tested for 20 minutes.\n" - "0 internal URLs and 2 external URLs have been checked.\n" + "1 internal URLs and 2 external URLs have been checked.\n" ) Url.objects.all().update(last_checked=yesterday) @@ -240,7 +241,7 @@ def test_checklinks_command(self): out.getvalue(), "Checking all links that haven't been tested for 20 minutes.\n" "Will run maximum of 1 checks this run.\n" - "0 internal URLs and 1 external URLs have been checked.\n" + "1 internal URLs and 1 external URLs have been checked.\n" ) diff --git a/linkcheck/utils.py b/linkcheck/utils.py index 0dc3ced..1a87828 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -78,14 +78,12 @@ def check_links(external_recheck_interval=10080, limit=-1, check_internal=True, recheck_datetime = datetime.now() - timedelta(minutes=external_recheck_interval) urls = urls.exclude(last_checked__gt=recheck_datetime) - # If limit is specified set the limit - if limit and limit > -1: - urls = urls[:limit] - check_count = 0 for u in urls: status = u.check_url(http://github.com/check_internal=check_internal,%20check_external=check_external) check_count += 1 if status is not None else 0 + if limit > -1 and check_count >= limit: + break return check_count From 16bfcf95e7e7a3e4a0a2b81805f502a062093853 Mon Sep 17 00:00:00 2001 From: andybak Date: Thu, 13 Jul 2017 15:14:56 +0100 Subject: [PATCH 009/188] Allow separate setting for redirect url length (Can be longer than 255 on MySQL and not hit the UNIQUE limit) --- linkcheck/linkcheck_settings.py | 1 + linkcheck/models.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/linkcheck/linkcheck_settings.py b/linkcheck/linkcheck_settings.py index b51fd59..e9c503f 100644 --- a/linkcheck/linkcheck_settings.py +++ b/linkcheck/linkcheck_settings.py @@ -54,6 +54,7 @@ LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT = getattr(settings, 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT', 10) MAX_CHECKS_PER_RUN = getattr(settings, 'LINKCHECK_MAX_CHECKS_PER_RUN', -1) MAX_URL_LENGTH = getattr(settings, 'LINKCHECK_MAX_URL_LENGTH', 255) +MAX_REDIRECT_URL_LENGTH = getattr(settings, 'LINKCHECK_MAX_REDIRECT_URL_LENGTH', MAX_URL_LENGTH) MEDIA_PREFIX = getattr(settings, 'LINKCHECK_MEDIA_PREFIX', '/media/') RESULTS_PER_PAGE = getattr(settings, 'LINKCHECK_RESULTS_PER_PAGE', 500) SITE_DOMAINS = getattr(settings, 'LINKCHECK_SITE_DOMAINS', []) diff --git a/linkcheck/models.py b/linkcheck/models.py index f4dffb3..1b8a1f2 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -33,6 +33,7 @@ from .linkcheck_settings import ( MAX_URL_LENGTH, + MAX_REDIRECT_URL_LENGTH, MEDIA_PREFIX, SITE_DOMAINS, EXTERNAL_REGEX_STRING, @@ -95,7 +96,7 @@ class Url(models.Model): status = models.NullBooleanField() message = models.CharField(max_length=1024, blank=True, null=True) still_exists = models.BooleanField(default=False) - redirect_to = models.CharField(max_length=MAX_URL_LENGTH, default='') + redirect_to = models.CharField(max_length=MAX_REDIRECT_URL_LENGTH, default='') @property def type(self): From 5bcc19944cfa2f3989298c123686e20ad35e5b6c Mon Sep 17 00:00:00 2001 From: andybak Date: Mon, 17 Jul 2017 13:47:39 +0100 Subject: [PATCH 010/188] Truncate redirect urls. It's not idea but it's better than failing to save the entire instance. See issue #75 --- linkcheck/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index 1b8a1f2..ff35133 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -359,7 +359,9 @@ def _check_external(self, tested_url, external_recheck_interval): self.message = 'Other Error: %s' % e else: if response.getcode() == 301 and response.geturl() != url: - self.redirect_to = response.geturl() + redirect_url = response.geturl() + redirect_url = redirect_url[:MAX_REDIRECT_URL_LENGTH] + self.redirect_to = redirect_url elif self.redirect_to: self.redirect_to = '' From 20976017a13a9602505e13f6b0be309d239ba407 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Mon, 17 Jul 2017 19:52:47 +0200 Subject: [PATCH 011/188] Fixed #75 -- Set redirect_to as TextField This allows to store URLs without length limits. Not a problem here for a field without indexes. --- .../0003_redirect_to_as_textfield.py | 19 +++++++++++++++++++ linkcheck/models.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 linkcheck/migrations/0003_redirect_to_as_textfield.py diff --git a/linkcheck/migrations/0003_redirect_to_as_textfield.py b/linkcheck/migrations/0003_redirect_to_as_textfield.py new file mode 100644 index 0000000..2a124e0 --- /dev/null +++ b/linkcheck/migrations/0003_redirect_to_as_textfield.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('linkcheck', '0002_url_redirect_to'), + ] + + operations = [ + migrations.AlterField( + model_name='url', + name='redirect_to', + field=models.TextField(blank=True), + ), + ] diff --git a/linkcheck/models.py b/linkcheck/models.py index ff35133..7196f23 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -96,7 +96,7 @@ class Url(models.Model): status = models.NullBooleanField() message = models.CharField(max_length=1024, blank=True, null=True) still_exists = models.BooleanField(default=False) - redirect_to = models.CharField(max_length=MAX_REDIRECT_URL_LENGTH, default='') + redirect_to = models.TextField(blank=True) @property def type(self): From 22c1c89e9fb992e4541d37aa2b91d879bd731279 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Mon, 17 Jul 2017 19:57:53 +0200 Subject: [PATCH 012/188] Revert "Allow separate setting for redirect url length (Can be longer than 255 on MySQL and not hit the UNIQUE limit)" This reverts commit 16bfcf95e7e7a3e4a0a2b81805f502a062093853. --- linkcheck/linkcheck_settings.py | 1 - linkcheck/models.py | 1 - 2 files changed, 2 deletions(-) diff --git a/linkcheck/linkcheck_settings.py b/linkcheck/linkcheck_settings.py index e9c503f..b51fd59 100644 --- a/linkcheck/linkcheck_settings.py +++ b/linkcheck/linkcheck_settings.py @@ -54,7 +54,6 @@ LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT = getattr(settings, 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT', 10) MAX_CHECKS_PER_RUN = getattr(settings, 'LINKCHECK_MAX_CHECKS_PER_RUN', -1) MAX_URL_LENGTH = getattr(settings, 'LINKCHECK_MAX_URL_LENGTH', 255) -MAX_REDIRECT_URL_LENGTH = getattr(settings, 'LINKCHECK_MAX_REDIRECT_URL_LENGTH', MAX_URL_LENGTH) MEDIA_PREFIX = getattr(settings, 'LINKCHECK_MEDIA_PREFIX', '/media/') RESULTS_PER_PAGE = getattr(settings, 'LINKCHECK_RESULTS_PER_PAGE', 500) SITE_DOMAINS = getattr(settings, 'LINKCHECK_SITE_DOMAINS', []) diff --git a/linkcheck/models.py b/linkcheck/models.py index 7196f23..52c4c16 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -33,7 +33,6 @@ from .linkcheck_settings import ( MAX_URL_LENGTH, - MAX_REDIRECT_URL_LENGTH, MEDIA_PREFIX, SITE_DOMAINS, EXTERNAL_REGEX_STRING, From c7d106cf99e07e2651bbf271c5185ddb0066111b Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Mon, 17 Jul 2017 19:58:13 +0200 Subject: [PATCH 013/188] Revert "Truncate redirect urls. It's not idea but it's better than failing to save the entire instance. See issue #75" This reverts commit 5bcc19944cfa2f3989298c123686e20ad35e5b6c. --- linkcheck/models.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index 52c4c16..72f2df1 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -358,9 +358,7 @@ def _check_external(self, tested_url, external_recheck_interval): self.message = 'Other Error: %s' % e else: if response.getcode() == 301 and response.geturl() != url: - redirect_url = response.geturl() - redirect_url = redirect_url[:MAX_REDIRECT_URL_LENGTH] - self.redirect_to = redirect_url + self.redirect_to = response.geturl() elif self.redirect_to: self.redirect_to = '' From 14ba60ae85ca941606efd9d2b10d3826b51a732e Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Tue, 12 Sep 2017 20:40:35 +0200 Subject: [PATCH 014/188] Fixed #78 -- Follow internal redirects to provide target status --- linkcheck/models.py | 11 +++++++++-- linkcheck/tests/test_linkcheck.py | 10 +++++++--- linkcheck/tests/urls.py | 2 ++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index ff35133..72afe38 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -264,8 +264,15 @@ def _check_internal(self, tested_url): self.status = False elif response.status_code == 302 or response.status_code == 301: - self.status = None - self.message = 'This link redirects: code %d (not automatically checked)' % (response.status_code, ) + redir_response = c.get(tested_url, follow=True) + if redir_response.status_code == 200: + redir_state = 'Working redirect' + self.status = True + else: + redir_state = 'Broken redirect' + self.status = False + self.message = 'This link redirects: code %d (%s)' % ( + response.status_code, redir_state) else: self.message = 'Broken internal link' settings.PREPEND_WWW = old_prepend_setting diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index b756978..69f3d62 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -91,11 +91,15 @@ def test_internal_check_anchor(self): self.assertEqual(uv.status, None) self.assertEqual(uv.message, 'Link to within the same page (not automatically checked)') - def test_internal_check_view_301(self): + def test_internal_check_view_redirect(self): uv = Url(url="/admin/linkcheck", still_exists=True) uv.check_url() - self.assertEqual(uv.status, None) - self.assertEqual(uv.message, 'This link redirects: code 301 (not automatically checked)') + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, 'This link redirects: code 301 (Working redirect)') + uv = Url(url="/http/brokenredirect/", still_exists=True) + uv.check_url() + self.assertEqual(uv.status, False) + self.assertEqual(uv.message, 'This link redirects: code 302 (Broken redirect)') def test_internal_check_found(self): uv = Url(url="/public/", still_exists=True) diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py index c7bb8d8..672068b 100644 --- a/linkcheck/tests/urls.py +++ b/linkcheck/tests/urls.py @@ -4,6 +4,7 @@ from django.conf.urls import include, url from django.contrib import admin from django import http +from django.views.generic import RedirectView from linkcheck.tests.sampleapp import views @@ -16,4 +17,5 @@ url(http://github.com/r'%5Ehttp/(?P%3Ccode%3E\d+)/$', views.http_response), url(http://github.com/r'%5Ehttp/(?P%3Ccode%3E\d+)/rückmeldung/$', views.http_response), url(http://github.com/r'%5Ehttp/redirect/(?P%3Ccode%3E\d+)/$', views.http_redirect), + url(http://github.com/r'%5Ehttp/brokenredirect/'),%20RedirectView.as_view(url='/non-existent/')), ] From 492781e1553c9854ad84a1dde0cae4f4a86eafdd Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Fri, 15 Sep 2017 14:53:48 +0800 Subject: [PATCH 015/188] remove follow_redirect setting --- linkcheck/linkcheck_settings.py | 1 - linkcheck/models.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/linkcheck/linkcheck_settings.py b/linkcheck/linkcheck_settings.py index b51fd59..9f1bde5 100644 --- a/linkcheck/linkcheck_settings.py +++ b/linkcheck/linkcheck_settings.py @@ -58,4 +58,3 @@ RESULTS_PER_PAGE = getattr(settings, 'LINKCHECK_RESULTS_PER_PAGE', 500) SITE_DOMAINS = getattr(settings, 'LINKCHECK_SITE_DOMAINS', []) DISABLE_LISTENERS = getattr(settings, 'LINKCHECK_DISABLE_LISTENERS', False) -FOLLOW_REDIRECT = getattr(settings, 'LINKCHECK_FOLLOW_REDIRECT', False) \ No newline at end of file diff --git a/linkcheck/models.py b/linkcheck/models.py index be0ab3a..ae7edff 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -38,7 +38,6 @@ EXTERNAL_REGEX_STRING, EXTERNAL_RECHECK_INTERVAL, LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, - FOLLOW_REDIRECT, ) logger = logging.getLogger('linkcheck') @@ -240,7 +239,7 @@ def _check_internal(self, tested_url): settings.PREPEND_WWW = False c = Client() c.handler = LinkCheckHandler() - response = c.get(tested_url, follow=FOLLOW_REDIRECT) + response = c.get(tested_url) if USE_REVERSION: # using test client will clear the RevisionContextManager stack. revision_context_manager.start() From 41c403fa866e68eadf29705f23ba9277026f52ca Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Fri, 15 Sep 2017 09:14:23 +0200 Subject: [PATCH 016/188] Fixed redirection code test on Django < 1.9 The default value of the RedirectView.permanent attribute has changed from True to False in Django 1.9. --- linkcheck/tests/test_linkcheck.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 69f3d62..d18f917 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -99,7 +99,8 @@ def test_internal_check_view_redirect(self): uv = Url(url="/http/brokenredirect/", still_exists=True) uv.check_url() self.assertEqual(uv.status, False) - self.assertEqual(uv.message, 'This link redirects: code 302 (Broken redirect)') + redirect_code = 301 if django.VERSION < (1, 9) else 302 + self.assertEqual(uv.message, 'This link redirects: code %d (Broken redirect)' % redirect_code) def test_internal_check_found(self): uv = Url(url="/public/", still_exists=True) From 42faf7bdd7a8bbe185018393c90a708e228fc644 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Fri, 15 Sep 2017 09:25:12 +0200 Subject: [PATCH 017/188] Set a date-based ordering for link report pages --- linkcheck/views.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/linkcheck/views.py b/linkcheck/views.py index 2552c35..a83f270 100644 --- a/linkcheck/views.py +++ b/linkcheck/views.py @@ -85,17 +85,18 @@ def report(request): link_filter = request.GET.get('filters', 'show_invalid') + qset = Link.objects.order_by('-url__last_checked') if link_filter == 'show_valid': - qset = Link.objects.filter(ignore=False, url__status__exact=True) + qset = qset.filter(ignore=False, url__status__exact=True) report_type = 'Good Links' elif link_filter == 'show_unchecked': - qset = Link.objects.filter(ignore=False, url__last_checked__exact=None) + qset = qset.filter(ignore=False, url__last_checked__exact=None) report_type = 'Untested Links' elif link_filter == 'ignored': - qset = Link.objects.filter(ignore=True) + qset = qset.filter(ignore=True) report_type = 'Ignored Links' else: - qset = Link.objects.filter(ignore=False, url__status__exact=False) + qset = qset.filter(ignore=False, url__status__exact=False) report_type = 'Broken Links' paginated_links = Paginator(qset, RESULTS_PER_PAGE, 0, True) From 455cefbc1b093707e7185da069546d1a047225a5 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 16 Sep 2017 12:18:06 +0200 Subject: [PATCH 018/188] Updated CHANGELOG for the 1.5 release --- CHANGELOG | 16 +++++++++++----- setup.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3583b1a..5be9778 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,8 +1,14 @@ -1.5 (to be released) +1.5 (2017-09-16) +* Added support for `tel:` links. +* For redirecting links, linkcheck now reports the status of the redirect + target (#78). * Dropped South migrations. +* 'Url.redirect_to' was migrated to a TextField to not limit its length (#75). +* Fixed handling of the '--limit' argument of the 'checklinks' command (#73). +* Fixed the task queue of links to check (#69). -1.4 +1.4 (2017-01-13) * Dropped support for Django 1.6 and Django 1.7, the minimal Python version is now Python 2.7. Django 1.10 is also supported. @@ -15,7 +21,7 @@ * A task queue is now used to process link checking, so as to prevent exhaustion of available threads during massive updates. -1.3 +1.3 (2016-06-05) * Django 1.9 compatibility added. * When checking internal links, redirects are not followed any longer. @@ -29,7 +35,7 @@ * Fix - correctly handle tags that are inside tags. * Fix - don't run pre_save if it's a new instance. -1.2 +1.2 (2015-11-13) * Added migration folders (missing in the 1.1 package). Also added support for South migrations (compatibility). @@ -37,7 +43,7 @@ is stored in Url.redirect_to and displayed in the link report. * Better support for URLs containing non-ASCII characters. -1.1 +1.1 (2015-06-03) * Minimal software requirements are now Python 2.6 / Django 1.6 (and South 1.0 if you still use Django 1.6). diff --git a/setup.py b/setup.py index beb024f..bc57e5d 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ def read(fname): setup( name='django-linkcheck', - version='1.4', + version='1.5', description="A Django app that will analyze and report on links in any " "model that you register with it.", long_description=read('README.rst'), From d6dfa76226af4b5d42e73417873d5794df58b15e Mon Sep 17 00:00:00 2001 From: andybak Date: Tue, 19 Sep 2017 16:46:19 +0100 Subject: [PATCH 019/188] Check edit permissions when using supplied django_admin_tools dashboard module --- linkcheck/dashboard.py | 7 ++++--- linkcheck/dashboard_extra_modules.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 linkcheck/dashboard_extra_modules.py diff --git a/linkcheck/dashboard.py b/linkcheck/dashboard.py index 4a7afcd..1a66226 100644 --- a/linkcheck/dashboard.py +++ b/linkcheck/dashboard.py @@ -1,4 +1,4 @@ -from admin_tools.dashboard import modules +from .dashboard_extra_modules import PermCheckingLinkList from linkcheck.views import get_status_message try: @@ -7,7 +7,7 @@ from django.core.urlresolvers import reverse -linkcheck_dashboard_module = modules.LinkList( +linkcheck_dashboard_module = PermCheckingLinkList( title="Linkchecker", pre_content=get_status_message, children=( @@ -15,5 +15,6 @@ {'title': 'Broken links', 'url': reverse('linkcheck_report')}, {'title': 'Untested links', 'url': reverse('linkcheck_report') + '?filters=show_unchecked'}, {'title': 'Ignored links', 'url': reverse('linkcheck_report') + '?filters=ignored'}, - ) + ), + required_perms=['linkcheck.can_change_link'], ) \ No newline at end of file diff --git a/linkcheck/dashboard_extra_modules.py b/linkcheck/dashboard_extra_modules.py new file mode 100644 index 0000000..2fa54e6 --- /dev/null +++ b/linkcheck/dashboard_extra_modules.py @@ -0,0 +1,16 @@ +from admin_tools.dashboard.modules import LinkList + + +class PermCheckingLinkList(LinkList): + + def __init__(self, title=None, **kwargs): + self.required_perms = kwargs.pop('linkcheck.can_change_link', []) + super(PermCheckingLinkList, self).__init__(title, **kwargs) + + def init_with_context(self, context): + super(PermCheckingLinkList, self).init_with_context(context) + if self.required_perms: + if not context['request'].user.has_perms(self.required_perms): + self.children = None + self.pre_content = None + self.post_content = None \ No newline at end of file From 8cd9b295e726730386c273644d95b13990c7c579 Mon Sep 17 00:00:00 2001 From: andybak Date: Tue, 19 Sep 2017 17:07:32 +0100 Subject: [PATCH 020/188] And that's why I should write tests --- linkcheck/dashboard_extra_modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/dashboard_extra_modules.py b/linkcheck/dashboard_extra_modules.py index 2fa54e6..18ca5b3 100644 --- a/linkcheck/dashboard_extra_modules.py +++ b/linkcheck/dashboard_extra_modules.py @@ -4,7 +4,7 @@ class PermCheckingLinkList(LinkList): def __init__(self, title=None, **kwargs): - self.required_perms = kwargs.pop('linkcheck.can_change_link', []) + self.required_perms = kwargs.pop('required_perms', []) super(PermCheckingLinkList, self).__init__(title, **kwargs) def init_with_context(self, context): From e32e5a41dc1cc549a94bd6a1405122cf22c30a65 Mon Sep 17 00:00:00 2001 From: andybak Date: Wed, 20 Sep 2017 09:11:06 +0100 Subject: [PATCH 021/188] The dashboard stuff was getting too specific to our codebase and doesn't really belong here. I might publish it as a separate project and deprecate this dashboard module. --- linkcheck/dashboard.py | 7 +++---- linkcheck/dashboard_extra_modules.py | 16 ---------------- 2 files changed, 3 insertions(+), 20 deletions(-) delete mode 100644 linkcheck/dashboard_extra_modules.py diff --git a/linkcheck/dashboard.py b/linkcheck/dashboard.py index 1a66226..4a7afcd 100644 --- a/linkcheck/dashboard.py +++ b/linkcheck/dashboard.py @@ -1,4 +1,4 @@ -from .dashboard_extra_modules import PermCheckingLinkList +from admin_tools.dashboard import modules from linkcheck.views import get_status_message try: @@ -7,7 +7,7 @@ from django.core.urlresolvers import reverse -linkcheck_dashboard_module = PermCheckingLinkList( +linkcheck_dashboard_module = modules.LinkList( title="Linkchecker", pre_content=get_status_message, children=( @@ -15,6 +15,5 @@ {'title': 'Broken links', 'url': reverse('linkcheck_report')}, {'title': 'Untested links', 'url': reverse('linkcheck_report') + '?filters=show_unchecked'}, {'title': 'Ignored links', 'url': reverse('linkcheck_report') + '?filters=ignored'}, - ), - required_perms=['linkcheck.can_change_link'], + ) ) \ No newline at end of file diff --git a/linkcheck/dashboard_extra_modules.py b/linkcheck/dashboard_extra_modules.py deleted file mode 100644 index 18ca5b3..0000000 --- a/linkcheck/dashboard_extra_modules.py +++ /dev/null @@ -1,16 +0,0 @@ -from admin_tools.dashboard.modules import LinkList - - -class PermCheckingLinkList(LinkList): - - def __init__(self, title=None, **kwargs): - self.required_perms = kwargs.pop('required_perms', []) - super(PermCheckingLinkList, self).__init__(title, **kwargs) - - def init_with_context(self, context): - super(PermCheckingLinkList, self).init_with_context(context) - if self.required_perms: - if not context['request'].user.has_perms(self.required_perms): - self.children = None - self.pre_content = None - self.post_content = None \ No newline at end of file From e215d7caef2a3009494e6da3385a7a41ebf3a0d4 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Tue, 21 Nov 2017 18:08:23 +0100 Subject: [PATCH 022/188] timezone.now is available since Django 1.4 --- linkcheck/models.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index ae7edff..d9c0363 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -20,10 +20,7 @@ from django.utils.six.moves import http_client from django.utils.six.moves.urllib.error import HTTPError, URLError from django.utils.six.moves.urllib.request import HTTPRedirectHandler, Request, build_opener -try: - from django.utils.timezone import now -except ImportError: - now = datetime.now +from django.utils.timezone import now try: from reversion.revisions import revision_context_manager From f8ffe41d9a1dba966440a97c31134bff03a9729d Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Tue, 5 Dec 2017 15:22:43 +0800 Subject: [PATCH 023/188] bug fix: move reversion fix to the bottom of _check_internal() --- linkcheck/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index d9c0363..760ff6e 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -237,10 +237,6 @@ def _check_internal(self, tested_url): c = Client() c.handler = LinkCheckHandler() response = c.get(tested_url) - if USE_REVERSION: - # using test client will clear the RevisionContextManager stack. - revision_context_manager.start() - if response.status_code == 200: self.message = 'Working internal link' self.status = True @@ -274,6 +270,10 @@ def _check_internal(self, tested_url): else: self.message = 'Invalid URL' + if USE_REVERSION: + # using test client will clear the RevisionContextManager stack. + revision_context_manager.start() + self.last_checked = now() self.save() From 10a09642f82f0446f37f31f03bd88c8468956477 Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Wed, 13 Dec 2017 13:15:48 +0800 Subject: [PATCH 024/188] catch unicode decode error --- linkcheck/models.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index 760ff6e..e463f4f 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -224,12 +224,16 @@ def _check_internal(self, tested_url): html_content = '' for field in instance._linklist.html_fields: html_content += getattr(instance, field, '') - names = parse_anchors(html_content) - if hash in names: - self.message = 'Working internal hash anchor' - self.status = True - else: - self.message = 'Broken internal hash anchor' + try: + names = parse_anchors(html_content) + if hash in names: + self.message = 'Working internal hash anchor' + self.status = True + else: + self.message = 'Broken internal hash anchor' + except UnicodeDecodeError: + self.message = 'Failed to parse HTML for anchor' + elif tested_url.startswith('/'): old_prepend_setting = settings.PREPEND_WWW @@ -246,13 +250,16 @@ def _check_internal(self, tested_url): elif tested_url.count('#'): anchor = tested_url.split('#')[1] from linkcheck import parse_anchors - names = parse_anchors(response.content) - if anchor in names: - self.message = 'Working internal hash anchor' - self.status = True - else: - self.message = 'Broken internal hash anchor' - self.status = False + try: + names = parse_anchors(response.content) + if anchor in names: + self.message = 'Working internal hash anchor' + self.status = True + else: + self.message = 'Broken internal hash anchor' + self.status = False + except UnicodeDecodeError: + self.message = 'Failed to parse HTML for anchor' elif response.status_code == 302 or response.status_code == 301: redir_response = c.get(tested_url, follow=True) From ab9f02d445f27da92e7ef82b4da3ebf882bd57d1 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Fri, 4 May 2018 17:06:48 +0200 Subject: [PATCH 025/188] Add Django 2.0 in Travis test config --- .travis.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index c0353c9..35429bd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,12 +4,16 @@ python: - 3.4 - 3.5 env: - - DJANGO=django==1.8.18 + - DJANGO=django==1.8.19 - DJANGO=django==1.9.13 - - DJANGO=django==1.10.7 - - DJANGO=django==1.11.2 + - DJANGO=django==1.10.8 + - DJANGO=django==1.11.13 + - DJANGO=django==2.0.5 matrix: fast_finish: true + exclude: + - python: 2.7 + env: DJANGO=django==2.0.5 install: - travis_retry pip install $DJANGO - pip install -e . From c7c1594dc8f9468e9db2e9d93c9d13a3b7a0c41d Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 19 Jul 2018 20:28:55 +0200 Subject: [PATCH 026/188] Fixed HTMLParser import on Django 2.1 --- linkcheck/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index 39ec120..1f21b00 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -1,9 +1,12 @@ import threading try: - from django.utils.html_parser import HTMLParser + from django.utils.html_parser import HTMLParser # Gone in Django 2.1 except ImportError: - from HTMLParser import HTMLParser + try: + from HTMLParser import HTMLParser # Python 2 + except ImportError: + from html.parser import HTMLParser # Python 3 # A global lock, showing whether linkcheck is busy update_lock = threading.Lock() From 23b43e7e463ef5f664efdeccb7295f3dca006f7f Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Thu, 20 Sep 2018 12:01:40 +0800 Subject: [PATCH 027/188] When we get CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:579) error we try the link using requests, and ignore SSL verification error. --- linkcheck/models.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/linkcheck/models.py b/linkcheck/models.py index e463f4f..d97a38b 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -6,6 +6,7 @@ from datetime import datetime from datetime import timedelta import logging +import requests from django.conf import settings try: @@ -315,6 +316,16 @@ def _check_external(self, tested_url, external_recheck_interval): req, timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT ) + except URLError as e: + # When we get CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:579) error + # we try the link using requests, and ignore SSL verification error. + if hasattr(e, 'reason') and 'certificate verify failed' in e.reason: + response = requests.head(url, verify=False, timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT) + response.code = response.status_code + response.msg = '' + else: + raise + except (ValueError, HTTPError) as error: # ...except sometimes it triggers a bug in urllib2 if hasattr(error, 'code') and error.code == METHOD_NOT_ALLOWED: From 193eebb8b79f765b5347ed4b095d8f28912d762b Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Thu, 20 Sep 2018 12:12:50 +0800 Subject: [PATCH 028/188] When we get CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:579) error we try the link using requests, and ignore SSL verification error. --- linkcheck/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index d97a38b..8eaa3bf 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -319,7 +319,7 @@ def _check_external(self, tested_url, external_recheck_interval): except URLError as e: # When we get CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:579) error # we try the link using requests, and ignore SSL verification error. - if hasattr(e, 'reason') and 'certificate verify failed' in e.reason: + if hasattr(e, 'reason') and 'certificate verify failed' in str(e.reason): response = requests.head(url, verify=False, timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT) response.code = response.status_code response.msg = '' From 31919a8e22a046fe34d94af579d2ce4bdfade7ce Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Thu, 20 Sep 2018 12:15:50 +0800 Subject: [PATCH 029/188] bug fix --- linkcheck/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index 8eaa3bf..43e5ddc 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -378,7 +378,7 @@ def _check_external(self, tested_url, external_recheck_interval): except Exception as e: self.message = 'Other Error: %s' % e else: - if response.getcode() == 301 and response.geturl() != url: + if response.getcode and response.getcode() == 301 and response.geturl() != url: self.redirect_to = response.geturl() elif self.redirect_to: self.redirect_to = '' From 0536f29704948a3aa6004226aea0809591a3cdcb Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Thu, 20 Sep 2018 12:16:45 +0800 Subject: [PATCH 030/188] bug fix --- linkcheck/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index 43e5ddc..f4cf8f6 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -378,7 +378,7 @@ def _check_external(self, tested_url, external_recheck_interval): except Exception as e: self.message = 'Other Error: %s' % e else: - if response.getcode and response.getcode() == 301 and response.geturl() != url: + if getattr(response, 'getcode') and response.getcode() == 301 and response.geturl() != url: self.redirect_to = response.geturl() elif self.redirect_to: self.redirect_to = '' From a829c22242f812bd55b2420ec4649628049513fe Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Thu, 20 Sep 2018 12:17:19 +0800 Subject: [PATCH 031/188] bug fix --- linkcheck/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index f4cf8f6..525c195 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -378,7 +378,7 @@ def _check_external(self, tested_url, external_recheck_interval): except Exception as e: self.message = 'Other Error: %s' % e else: - if getattr(response, 'getcode') and response.getcode() == 301 and response.geturl() != url: + if getattr(response, 'getcode', False) and response.getcode() == 301 and response.geturl() != url: self.redirect_to = response.geturl() elif self.redirect_to: self.redirect_to = '' From 0841bdaac8d5577eace7e436fa165fb82f536558 Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Thu, 10 Jan 2019 11:58:54 +0800 Subject: [PATCH 032/188] add requests to .travis.yml --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 35429bd..fa56344 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,7 @@ matrix: env: DJANGO=django==2.0.5 install: - travis_retry pip install $DJANGO + - travis_retry pip install requests - pip install -e . script: python runtests.py notifications: From 829b1cab85f6d5a829afd95de5a7dfb2a06805f9 Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Thu, 10 Jan 2019 13:24:02 +0800 Subject: [PATCH 033/188] django2.1 compatible --- linkcheck/models.py | 3 ++- linkcheck/utils.py | 59 +++++++++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index 525c195..56859d9 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -151,10 +151,11 @@ def check_url(self, check_internal=True, check_external=True, external_recheck_i tested_url = self.url # May receive transformation before being checked + internal_exceptions = [] if SITE_DOMAINS: # If the setting is present internal_exceptions = SITE_DOMAINS - else: # try using SITE_DOMAIN + elif getattr(settings, 'SITE_DOMAIN', None): # try using SITE_DOMAIN root_domain = settings.SITE_DOMAIN if root_domain.startswith('www.'): root_domain = root_domain[4:] diff --git a/linkcheck/utils.py b/linkcheck/utils.py index 1a87828..8afa81f 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -17,18 +17,20 @@ def load_middleware(self): self.ignore_keywords = ['reversion.middleware','MaintenanceModeMiddleware'] super(LinkCheckHandler, self).load_middleware() new_request_middleware = [] - + #############################_request_middleware################################# - for method in self._request_middleware: - ignored = False - for keyword in self.ignore_keywords: - if method.__str__().count(keyword): - ignored = True - break - if not ignored: - new_request_middleware.append(method) - self._request_middleware = new_request_middleware - + # _request_middleware is removed in newer django. + if getattr(self, '_request_middleware', None): + for method in self._request_middleware: + ignored = False + for keyword in self.ignore_keywords: + if method.__str__().count(keyword): + ignored = True + break + if not ignored: + new_request_middleware.append(method) + self._request_middleware = new_request_middleware + #############################_view_middleware################################# new_view_middleware = [] for method in self._view_middleware: @@ -42,16 +44,31 @@ def load_middleware(self): self._view_middleware = new_view_middleware #############################_response_middleware################################# - new_response_middleware = [] - for method in self._response_middleware: - ignored = False - for keyword in self.ignore_keywords: - if method.__str__().count(keyword): - ignored = True - break - if not ignored: - new_response_middleware.append(method) - self._response_middleware = new_response_middleware + if getattr(self, '_response_middleware', None): + new_response_middleware = [] + for method in self._response_middleware: + ignored = False + for keyword in self.ignore_keywords: + if method.__str__().count(keyword): + ignored = True + break + if not ignored: + new_response_middleware.append(method) + self._response_middleware = new_response_middleware + + + #############################_template_response_middleware################################# + if getattr(self, '_template_response_middleware', None): + new_template_response_middleware = [] + for method in self._template_response_middleware: + ignored = False + for keyword in self.ignore_keywords: + if method.__str__().count(keyword): + ignored = True + break + if not ignored: + new_template_response_middleware.append(method) + self._template_response_middleware = new_template_response_middleware #############################_exception_middleware################################# new_exception_middleware = [] From 9f6c0d4c1afd604392c9d9904cedeb79ce526713 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Fri, 22 Feb 2019 23:42:34 +0100 Subject: [PATCH 034/188] Add Django 2.1 in travis config (#86) --- .travis.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index fa56344..651dbcc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,17 +3,23 @@ python: - 2.7 - 3.4 - 3.5 + - 3.6 env: - DJANGO=django==1.8.19 - DJANGO=django==1.9.13 - DJANGO=django==1.10.8 - - DJANGO=django==1.11.13 - - DJANGO=django==2.0.5 + - DJANGO=django==1.11.20 + - DJANGO=django==2.0.13 + - DJANGO=django==2.1.7 matrix: fast_finish: true exclude: - python: 2.7 - env: DJANGO=django==2.0.5 + env: DJANGO=django==2.0.13 + - python: 2.7 + env: DJANGO=django==2.1.7 + - python: 3.4 + env: DJANGO=django==2.1.7 install: - travis_retry pip install $DJANGO - travis_retry pip install requests From 37c2aef642b96fa7e20c918819a9770d36efafc8 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 16 Mar 2019 12:56:50 +0100 Subject: [PATCH 035/188] Fixes #87 - Write accented file path only for a specific test --- "linkcheck/tests/media/ru\314\210ckmeldung" | 0 "linkcheck/tests/media/r\303\274ckmeldung" | 0 linkcheck/tests/test_linkcheck.py | 3 +++ 3 files changed, 3 insertions(+) delete mode 100644 "linkcheck/tests/media/ru\314\210ckmeldung" delete mode 100644 "linkcheck/tests/media/r\303\274ckmeldung" diff --git "a/linkcheck/tests/media/ru\314\210ckmeldung" "b/linkcheck/tests/media/ru\314\210ckmeldung" deleted file mode 100644 index e69de29..0000000 diff --git "a/linkcheck/tests/media/r\303\274ckmeldung" "b/linkcheck/tests/media/r\303\274ckmeldung" deleted file mode 100644 index e69de29..0000000 diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index d18f917..a02cadc 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -150,6 +150,9 @@ def test_internal_check_media_found(self): self.assertEqual(uv.message, 'Working file link') def test_internal_check_media_utf8(self): + media_file = os.path.join(os.path.dirname(__file__), 'media', 'rückmeldung') + open(media_file, 'a').close() + self.addCleanup(os.remove, media_file) uv = Url(url="/media/r%C3%BCckmeldung", still_exists=True) uv.check_url() self.assertEqual(uv.status, True) From 5830bce314896df2684ae258608abac48bc37943 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 20 Mar 2019 18:49:40 +0100 Subject: [PATCH 036/188] Updated CHANGELOG for the 1.6 release Refs #89. --- CHANGELOG | 6 ++++++ setup.py | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 5be9778..2401fec 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +1.6 (2019-03-20) + +* Use requests library when getting 'certificate verify failed' errors. +* Fixed compatibility issues with newer versions of Django. +* Fixed pip installation issues with encoding errors (#87). + 1.5 (2017-09-16) * Added support for `tel:` links. diff --git a/setup.py b/setup.py index bc57e5d..431b199 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ def read(fname): setup( name='django-linkcheck', - version='1.5', + version='1.6', description="A Django app that will analyze and report on links in any " "model that you register with it.", long_description=read('README.rst'), @@ -40,6 +40,8 @@ def read(fname): 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', 'Framework :: Django', ], ) From 31da991726b62011367b95cf46a9fda280539802 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 20 Mar 2019 20:13:04 +0100 Subject: [PATCH 037/188] Add missing requests dep in setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 431b199..49dd356 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ def read(fname): 'tests/media/*', ] }, + install_requires=['requests'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Web Environment', From aa71a2052e3a75f94e4716b81d27e40e5790f2fb Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 20 Mar 2019 20:31:55 +0100 Subject: [PATCH 038/188] Fallback LINKCHECK_MEDIA_PREFIX settings to MEDIA_URL --- linkcheck/linkcheck_settings.py | 2 +- runtests.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/linkcheck/linkcheck_settings.py b/linkcheck/linkcheck_settings.py index 9f1bde5..667590c 100644 --- a/linkcheck/linkcheck_settings.py +++ b/linkcheck/linkcheck_settings.py @@ -54,7 +54,7 @@ LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT = getattr(settings, 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT', 10) MAX_CHECKS_PER_RUN = getattr(settings, 'LINKCHECK_MAX_CHECKS_PER_RUN', -1) MAX_URL_LENGTH = getattr(settings, 'LINKCHECK_MAX_URL_LENGTH', 255) -MEDIA_PREFIX = getattr(settings, 'LINKCHECK_MEDIA_PREFIX', '/media/') +MEDIA_PREFIX = getattr(settings, 'LINKCHECK_MEDIA_PREFIX', settings.MEDIA_URL) RESULTS_PER_PAGE = getattr(settings, 'LINKCHECK_RESULTS_PER_PAGE', 500) SITE_DOMAINS = getattr(settings, 'LINKCHECK_SITE_DOMAINS', []) DISABLE_LISTENERS = getattr(settings, 'LINKCHECK_DISABLE_LISTENERS', False) diff --git a/runtests.py b/runtests.py index d23c9bc..555a82a 100644 --- a/runtests.py +++ b/runtests.py @@ -10,6 +10,7 @@ test_settings = { 'DATABASES': {'default': {'ENGINE': 'django.db.backends.sqlite3'}}, 'STATIC_URL': '/static/', + 'MEDIA_URL': '/media/', 'INSTALLED_APPS': [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.sessions', 'django.contrib.contenttypes', From da65582a77714d7ba9e9a1885a6e30c230fe9ad1 Mon Sep 17 00:00:00 2001 From: David Vogt Date: Mon, 25 Mar 2019 10:04:12 +0100 Subject: [PATCH 039/188] Fix timezone warnings When timezone-aware dates are used, this would lead to a ton of warnings. In other places of the project, it was already fixed, but not this one. --- linkcheck/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linkcheck/utils.py b/linkcheck/utils.py index 8afa81f..6240815 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -1,8 +1,8 @@ from django.apps import apps from django.db import models from django.test.client import ClientHandler +from django.utils import timezone -from datetime import datetime from datetime import timedelta from .models import Link, Url @@ -92,7 +92,7 @@ def check_links(external_recheck_interval=10080, limit=-1, check_internal=True, # An optimization for when check_internal is False if not check_internal: - recheck_datetime = datetime.now() - timedelta(minutes=external_recheck_interval) + recheck_datetime = timezone.now() - timedelta(minutes=external_recheck_interval) urls = urls.exclude(last_checked__gt=recheck_datetime) check_count = 0 From 2b045d24e7e19f6e247b42ba885fe2f3dda6190f Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 18 Apr 2019 09:41:56 +0200 Subject: [PATCH 040/188] Fixes #90 - Dropped support for Django < 1.11 --- .travis.yml | 3 --- CHANGELOG | 4 ++++ README.rst | 2 +- linkcheck/dashboard.py | 5 +---- linkcheck/models.py | 5 +---- linkcheck/tests/test_linkcheck.py | 6 +----- linkcheck/views.py | 14 ++++---------- runtests.py | 2 -- 8 files changed, 12 insertions(+), 29 deletions(-) diff --git a/.travis.yml b/.travis.yml index 651dbcc..2922236 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,9 +5,6 @@ python: - 3.5 - 3.6 env: - - DJANGO=django==1.8.19 - - DJANGO=django==1.9.13 - - DJANGO=django==1.10.8 - DJANGO=django==1.11.20 - DJANGO=django==2.0.13 - DJANGO=django==2.1.7 diff --git a/CHANGELOG b/CHANGELOG index 2401fec..94d977d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,7 @@ +1.7 (unreleased) + +* Dropped support for Django < 1.11 + 1.6 (2019-03-20) * Use requests library when getting 'certificate verify failed' errors. diff --git a/README.rst b/README.rst index dd97cd8..864d3e6 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,7 @@ automatically when objects are saved. This is handled by signals. Minimal requirements -------------------- -django-linkchecks requires Python 2.7 and Django 1.8. It is Python 3 compatible. +django-linkchecks requires Python 2.7 and Django 1.11. It is Python 3 compatible. Basic usage ----------- diff --git a/linkcheck/dashboard.py b/linkcheck/dashboard.py index 4a7afcd..b733c5d 100644 --- a/linkcheck/dashboard.py +++ b/linkcheck/dashboard.py @@ -1,10 +1,7 @@ from admin_tools.dashboard import modules from linkcheck.views import get_status_message -try: - from django.urls import reverse -except ImportError: # Django < 1.10 - from django.core.urlresolvers import reverse +from django.urls import reverse linkcheck_dashboard_module = modules.LinkList( diff --git a/linkcheck/models.py b/linkcheck/models.py index 56859d9..a28ca33 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -9,10 +9,7 @@ import requests from django.conf import settings -try: - from django.contrib.contenttypes.fields import GenericForeignKey -except ImportError: - from django.contrib.contenttypes.generic import GenericForeignKey +from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType from django.db import models from django.test.client import Client diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index a02cadc..50eb7d6 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -12,15 +12,11 @@ from django.core.management import call_command from django.test import LiveServerTestCase, TestCase from django.test.utils import override_settings +from django.urls import reverse from django.utils.six import StringIO from django.utils.six.moves.urllib import request from django.utils.six.moves.urllib.error import HTTPError -try: - from django.urls import reverse -except ImportError: # Django < 1.10 - from django.core.urlresolvers import reverse - from linkcheck.models import Link, Url from linkcheck.views import get_jquery_min_js diff --git a/linkcheck/views.py b/linkcheck/views.py index a83f270..b3f1e71 100644 --- a/linkcheck/views.py +++ b/linkcheck/views.py @@ -12,13 +12,9 @@ from django.core.paginator import Paginator from django.http import HttpResponse from django.shortcuts import render +from django.urls import reverse, NoReverseMatch from django.views.decorators.csrf import csrf_exempt -try: - from django.urls import reverse, NoReverseMatch -except ImportError: # Django < 1.10 - from django.core.urlresolvers import reverse, NoReverseMatch - from linkcheck import update_lock from linkcheck.linkcheck_settings import RESULTS_PER_PAGE from linkcheck.models import Link @@ -163,12 +159,10 @@ def report(request): def get_jquery_min_js(): """ - Return the location of jquery.min.js. It's in different places in - different versions of Django. + Return the location of jquery.min.js. It's an entry point to adapt the path + when it changes in Django. """ - jquery_min_js = ('admin/js/jquery.min.js' if django.VERSION < (1, 10) - else 'admin/js/vendor/jquery/jquery.min.js') - return jquery_min_js + return 'admin/js/vendor/jquery/jquery.min.js' def get_status_message(): diff --git a/runtests.py b/runtests.py index 555a82a..5f8407f 100644 --- a/runtests.py +++ b/runtests.py @@ -35,8 +35,6 @@ }, }], } - # Django < 1.10 compatibility - test_settings['MIDDLEWARE_CLASSES'] = test_settings['MIDDLEWARE'] settings.configure(**test_settings) From ad3373cdc3ae8d92bf90b3b906c0849abb450ff3 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 18 Apr 2019 10:05:41 +0200 Subject: [PATCH 041/188] Added the messages framework to test settings --- runtests.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runtests.py b/runtests.py index 5f8407f..fe76edf 100644 --- a/runtests.py +++ b/runtests.py @@ -14,12 +14,14 @@ 'INSTALLED_APPS': [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.sessions', 'django.contrib.contenttypes', + 'django.contrib.messages', 'linkcheck', 'linkcheck.tests.sampleapp', ], 'ROOT_URLCONF': "linkcheck.tests.urls", 'SITE_DOMAIN': "localhost", 'MIDDLEWARE': [ 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', @@ -30,6 +32,7 @@ 'OPTIONS': { 'context_processors': [ 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', 'django.template.context_processors.static', ], }, From af53e12c05bb041fc2a47e1f42e060ce4c196418 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 18 Apr 2019 10:06:41 +0200 Subject: [PATCH 042/188] Confirm and test support for Django 2.2 --- .travis.yml | 6 ++++++ linkcheck/tests/urls.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 2922236..bd23e76 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ language: python +dist: xenial python: - 2.7 - 3.4 @@ -8,6 +9,7 @@ env: - DJANGO=django==1.11.20 - DJANGO=django==2.0.13 - DJANGO=django==2.1.7 + - DJANGO=django==2.2 matrix: fast_finish: true exclude: @@ -17,6 +19,10 @@ matrix: env: DJANGO=django==2.1.7 - python: 3.4 env: DJANGO=django==2.1.7 + - python: 2.7 + env: DJANGO=django==2.2 + - python: 3.4 + env: DJANGO=django==2.2 install: - travis_retry pip install $DJANGO - travis_retry pip install requests diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py index 672068b..6ff44ee 100644 --- a/linkcheck/tests/urls.py +++ b/linkcheck/tests/urls.py @@ -8,7 +8,7 @@ from linkcheck.tests.sampleapp import views -handler404 = lambda x, **kwargs: http.HttpResponseNotFound('') +handler404 = lambda *args, **kwargs: http.HttpResponseNotFound('') urlpatterns = [ url(http://github.com/r'%5Eadmin/linkcheck/',%20include('linkcheck.urls')), From e8b0ae5a3588db989893290b1c510bf01551a8cc Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 29 Aug 2019 11:23:34 +0200 Subject: [PATCH 043/188] Use requests package instead of custom urllib requests --- .travis.yml | 14 ++-- CHANGELOG | 1 + linkcheck/models.py | 112 ++++++++--------------------- linkcheck/tests/sampleapp/views.py | 17 ++++- linkcheck/tests/test_linkcheck.py | 37 ++++++++-- linkcheck/tests/urls.py | 3 + runtests.py | 1 + 7 files changed, 88 insertions(+), 97 deletions(-) diff --git a/.travis.yml b/.travis.yml index bd23e76..4b6f3c8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,23 +6,23 @@ python: - 3.5 - 3.6 env: - - DJANGO=django==1.11.20 + - DJANGO=django==1.11.23 - DJANGO=django==2.0.13 - - DJANGO=django==2.1.7 - - DJANGO=django==2.2 + - DJANGO=django==2.1.11 + - DJANGO=django==2.2.4 matrix: fast_finish: true exclude: - python: 2.7 env: DJANGO=django==2.0.13 - python: 2.7 - env: DJANGO=django==2.1.7 + env: DJANGO=django==2.1.11 - python: 3.4 - env: DJANGO=django==2.1.7 + env: DJANGO=django==2.1.11 - python: 2.7 - env: DJANGO=django==2.2 + env: DJANGO=django==2.2.4 - python: 3.4 - env: DJANGO=django==2.2 + env: DJANGO=django==2.2.4 install: - travis_retry pip install $DJANGO - travis_retry pip install requests diff --git a/CHANGELOG b/CHANGELOG index 94d977d..0c48aaf 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ 1.7 (unreleased) * Dropped support for Django < 1.11 +* Made more usage of the requests library. 1.6 (2019-03-20) diff --git a/linkcheck/models.py b/linkcheck/models.py index a28ca33..e07d3e7 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -3,10 +3,11 @@ import re import os.path -from datetime import datetime from datetime import timedelta import logging import requests +from requests.exceptions import ReadTimeout +from requests.models import REDIRECT_STATI from django.conf import settings from django.contrib.contenttypes.fields import GenericForeignKey @@ -15,9 +16,6 @@ from django.test.client import Client from django.utils.encoding import iri_to_uri, python_2_unicode_compatible from django.utils.http import urlunquote -from django.utils.six.moves import http_client -from django.utils.six.moves.urllib.error import HTTPError, URLError -from django.utils.six.moves.urllib.request import HTTPRedirectHandler, Request, build_opener from django.utils.timezone import now try: @@ -39,26 +37,6 @@ EXTERNAL_REGEX = re.compile(EXTERNAL_REGEX_STRING) -METHOD_NOT_ALLOWED = 405 - - -class HeadRequest(Request): - def get_method(self): - return "HEAD" - - -class GetRequest(Request): - def get_method(self): - return "GET" - - -class RedirectHandler(HTTPRedirectHandler): - """With this custom handler, we'll be able to identify 301 redirections""" - def http_error_301(self, req, fp, code, *args): - result = HTTPRedirectHandler.http_error_301(self, req, fp, code, *args) - if result: - result.code = result.status = code - return result def html_decode(s): @@ -290,7 +268,6 @@ def _check_external(self, tested_url, external_recheck_interval): if self.last_checked and (self.last_checked > external_recheck_datetime): return self.status - opener = build_opener(RedirectHandler) # Remove URL fragment identifiers url = tested_url.rsplit('#')[0] # Check that non-ascii chars are properly encoded @@ -299,51 +276,37 @@ def _check_external(self, tested_url, external_recheck_interval): except UnicodeEncodeError: url = iri_to_uri(url) + request_params = { + 'verify': False, 'allow_redirects': True, + 'headers': {'User-Agent' : "http://%s Linkchecker" % settings.SITE_DOMAIN}, + 'timeout': LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, + } try: if tested_url.count('#'): # We have to get the content so we can check the anchors - response = opener.open( - url, - timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT - ) + response = requests.get(url, **request_params) else: # Might as well just do a HEAD request - req = HeadRequest(url, headers={'User-Agent' : "http://%s Linkchecker" % settings.SITE_DOMAIN}) - try: - response = opener.open( - req, - timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT - ) - except URLError as e: - # When we get CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:579) error - # we try the link using requests, and ignore SSL verification error. - if hasattr(e, 'reason') and 'certificate verify failed' in str(e.reason): - response = requests.head(url, verify=False, timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT) - response.code = response.status_code - response.msg = '' - else: - raise - - except (ValueError, HTTPError) as error: - # ...except sometimes it triggers a bug in urllib2 - if hasattr(error, 'code') and error.code == METHOD_NOT_ALLOWED: - req = GetRequest(url, headers={'User-Agent' : "http://%s Linkchecker" % settings.SITE_DOMAIN}) - else: - req = url - response = opener.open( - req, - timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT - ) - - self.message = ' '.join([str(response.code), response.msg]) - self.status = True + response = requests.head(url, **request_params) + + if response.status_code >= 400: + # If HEAD is not allowed, let's try with GET + response = requests.get(url, **request_params) + except ReadTimeout: + self.message = 'Other Error: The read operation timed out' + self.status = False + except Exception as e: + self.message = 'Other Error: %s' % e + self.status = False + else: + self.message = ' '.join([str(response.status_code), response.reason]) + self.status = 200 <= response.status_code < 400 if tested_url.count('#'): - anchor = tested_url.split('#')[1] from linkcheck import parse_anchors try: - names = parse_anchors(response.read()) + names = parse_anchors(response.text) if anchor in names: self.message = 'Working external hash anchor' self.status = True @@ -357,29 +320,12 @@ def _check_external(self, tested_url, external_recheck_interval): self.message = "Page OK but anchor can't be checked" self.status = True - except http_client.BadStatusLine: - self.message = "Bad Status Line" - - except HTTPError as e: - if hasattr(e, 'code') and hasattr(e, 'msg'): - self.message = ' '.join([str(e.code), e.msg]) - else: - self.message = "Unknown Error" - - except URLError as e: - if hasattr(e, 'reason'): - self.message = 'Unreachable: '+str(e.reason) - elif hasattr(e, 'code') and e.code!=301: - self.message = 'Error: '+str(e.code) - else: - self.message = 'Redirect. Check manually: '+str(e.code) - except Exception as e: - self.message = 'Other Error: %s' % e - else: - if getattr(response, 'getcode', False) and response.getcode() == 301 and response.geturl() != url: - self.redirect_to = response.geturl() - elif self.redirect_to: - self.redirect_to = '' + if response.status_code in REDIRECT_STATI: + # This means it could not follow the redirection + self.status = False + elif response.status_code < 300 and response.history: + self.message = ' '.join([str(response.history[0].status_code), response.history[0].reason]) + self.redirect_to = response.url self.last_checked = now() self.save() diff --git a/linkcheck/tests/sampleapp/views.py b/linkcheck/tests/sampleapp/views.py index fc7ecad..60821a9 100644 --- a/linkcheck/tests/sampleapp/views.py +++ b/linkcheck/tests/sampleapp/views.py @@ -1,9 +1,24 @@ -from django.http import HttpResponse, HttpResponseRedirect +import time +from django.http import HttpResponse, HttpResponsePermanentRedirect, HttpResponseRedirect def http_response(request, code): return HttpResponse("", status=int(code)) +def http_response_get_only(request, code): + status = int(code) if request.method == 'HEAD' else 200 + return HttpResponse("", status=status) + + def http_redirect(request, code): return HttpResponseRedirect("/http/200/", status=int(code)) + + +def http_redirect_to_404(request): + return HttpResponsePermanentRedirect("/http/404/") + + +def timeout(request): + time.sleep(2) + return HttpResponse("") diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 50eb7d6..c634677 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from datetime import datetime, timedelta +from unittest import skipIf import os import re @@ -160,6 +161,8 @@ def test_internal_check_media_utf8(self): self.assertEqual(uv.message, 'Working file link') +# See https://code.djangoproject.com/ticket/29849 (fixed in Django 2.1+) +@skipIf(django.VERSION[:2]==(2, 0), 'LiveServerTestCase is broken on Django 2.0.x') @override_settings(SITE_DOMAIN='example.com') class ExternalCheckTestCase(LiveServerTestCase): def test_external_check_200(self): @@ -185,23 +188,21 @@ def test_external_check_301(self): uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message.lower(), '301 moved permanently') + self.assertEqual(uv.redirect_to, '') def test_external_check_301_followed(self): uv = Url(url="%s/http/redirect/301/" % self.live_server_url, still_exists=True) uv.check_url() self.assertEqual(uv.status, True) - self.assertEqual(uv.message, '301 OK') + self.assertEqual(uv.message, '301 Moved Permanently') self.assertEqual(uv.redirect_to, '%s/http/200/' % self.live_server_url) def test_external_check_302_followed(self): - """ - For temporary redirects, we do not report any redirection in `redirect_to`. - """ uv = Url(url="%s/http/redirect/302/" % self.live_server_url, still_exists=True) uv.check_url() self.assertEqual(uv.status, True) - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.redirect_to, '') + self.assertEqual(uv.message, '302 Found') + self.assertEqual(uv.redirect_to, '%s/http/200/' % self.live_server_url) def test_external_check_404(self): uv = Url(url="%s/whatever/" % self.live_server_url, still_exists=True) @@ -209,6 +210,30 @@ def test_external_check_404(self): self.assertEqual(uv.status, False) self.assertEqual(uv.message.lower(), '404 not found') + def test_external_check_redirect_final_404(self): + uv = Url(url="%s/http/redirect_to_404/" % self.live_server_url, still_exists=True) + uv.check_url() + self.assertEqual(uv.status, False) + self.assertEqual(uv.message.lower(), '404 not found') + + def test_external_check_get_only(self): + # An URL that allows GET but not HEAD, linkcheck should fallback on GET. + uv = Url(url="%s/http/getonly/405/" % self.live_server_url, still_exists=True) + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, '200 OK') + # Same test with other 40x error + uv = Url(url="%s/http/getonly/400/" % self.live_server_url, still_exists=True) + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, '200 OK') + + def test_external_check_timedout(self): + uv = Url(url="%s/timeout/" % self.live_server_url, still_exists=True) + uv.check_url() + self.assertEqual(uv.status, False) + self.assertEqual(uv.message, 'Other Error: The read operation timed out') + class ChecklinksTestCase(TestCase): def setUp(self): diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py index 6ff44ee..e04b39f 100644 --- a/linkcheck/tests/urls.py +++ b/linkcheck/tests/urls.py @@ -16,6 +16,9 @@ url(http://github.com/r'%5Epublic/',%20views.http_response,%20%7B'code':%20'200'%7D), url(http://github.com/r'%5Ehttp/(?P%3Ccode%3E\d+)/$', views.http_response), url(http://github.com/r'%5Ehttp/(?P%3Ccode%3E\d+)/rückmeldung/$', views.http_response), + url(http://github.com/r'%5Ehttp/getonly/(?P%3Ccode%3E\d+)/$', views.http_response_get_only), url(http://github.com/r'%5Ehttp/redirect/(?P%3Ccode%3E\d+)/$', views.http_redirect), + url(http://github.com/r'%5Ehttp/redirect_to_404/),%20views.http_redirect_to_404), url(http://github.com/r'%5Ehttp/brokenredirect/'),%20RedirectView.as_view(url='/non-existent/')), + url(http://github.com/r'%5Etimeout/),%20views.timeout), ] diff --git a/runtests.py b/runtests.py index fe76edf..f33872e 100644 --- a/runtests.py +++ b/runtests.py @@ -37,6 +37,7 @@ ], }, }], + 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT': 1, } settings.configure(**test_settings) From 68becff862ea4f8393fa12e169f570904cea4cea Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 25 Dec 2019 18:36:39 +0100 Subject: [PATCH 044/188] Drop support for Python 2 --- .travis.yml | 9 +-------- CHANGELOG | 2 +- README.rst | 2 +- linkcheck/__init__.py | 2 +- linkcheck/apps.py | 2 +- linkcheck/models.py | 9 +++------ linkcheck/tests/test_linkcheck.py | 9 +++------ linkcheck/tests/urls.py | 3 --- linkcheck/utils.py | 2 +- linkcheck/views.py | 2 +- setup.py | 4 ++-- 11 files changed, 15 insertions(+), 31 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4b6f3c8..567ef6b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,9 @@ language: python -dist: xenial python: - - 2.7 - 3.4 - 3.5 - 3.6 + - 3.7 env: - DJANGO=django==1.11.23 - DJANGO=django==2.0.13 @@ -13,14 +12,8 @@ env: matrix: fast_finish: true exclude: - - python: 2.7 - env: DJANGO=django==2.0.13 - - python: 2.7 - env: DJANGO=django==2.1.11 - python: 3.4 env: DJANGO=django==2.1.11 - - python: 2.7 - env: DJANGO=django==2.2.4 - python: 3.4 env: DJANGO=django==2.2.4 install: diff --git a/CHANGELOG b/CHANGELOG index 0c48aaf..f8026bb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,6 @@ 1.7 (unreleased) -* Dropped support for Django < 1.11 +* Dropped support for Python 2 and Django < 1.11 * Made more usage of the requests library. 1.6 (2019-03-20) diff --git a/README.rst b/README.rst index 864d3e6..7c02e00 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,7 @@ automatically when objects are saved. This is handled by signals. Minimal requirements -------------------- -django-linkchecks requires Python 2.7 and Django 1.11. It is Python 3 compatible. +django-linkchecks requires Python 3 and Django 1.11. Basic usage ----------- diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index 1f21b00..08b42e3 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -107,7 +107,7 @@ def parse_anchors(content): return parser.names -class Linklist(object): +class Linklist: html_fields = [] url_fields = [] diff --git a/linkcheck/apps.py b/linkcheck/apps.py index 1629948..87c8027 100644 --- a/linkcheck/apps.py +++ b/linkcheck/apps.py @@ -47,7 +47,7 @@ def build_linklists(self): class LinkcheckConfig(BaseLinkcheckConfig): def ready(self): from .linkcheck_settings import DISABLE_LISTENERS - super(LinkcheckConfig, self).ready() + super().ready() if not DISABLE_LISTENERS: # This import will register listeners diff --git a/linkcheck/models.py b/linkcheck/models.py index e07d3e7..5b0c758 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import os.path @@ -8,14 +6,14 @@ import requests from requests.exceptions import ReadTimeout from requests.models import REDIRECT_STATI +from urllib.parse import unquote from django.conf import settings from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType from django.db import models from django.test.client import Client -from django.utils.encoding import iri_to_uri, python_2_unicode_compatible -from django.utils.http import urlunquote +from django.utils.encoding import iri_to_uri from django.utils.timezone import now try: @@ -56,7 +54,6 @@ def html_decode(s): return s -@python_2_unicode_compatible class Url(models.Model): """ Represents a distinct URL found somewhere in the models registered with linkcheck @@ -179,7 +176,7 @@ def _check_internal(self, tested_url): elif tested_url.startswith(MEDIA_PREFIX): # TODO Assumes a direct mapping from media url to local filesystem path. This will break quite easily for alternate setups - path = settings.MEDIA_ROOT + urlunquote(tested_url)[len(MEDIA_PREFIX)-1:] + path = settings.MEDIA_ROOT + unquote(tested_url)[len(MEDIA_PREFIX)-1:] decoded_path = html_decode(path) if os.path.exists(path) or os.path.exists(decoded_path): self.message = 'Working file link' diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index c634677..3f68fe5 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -1,8 +1,8 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - from datetime import datetime, timedelta +from io import StringIO from unittest import skipIf +from urllib import request +from urllib.error import HTTPError import os import re @@ -14,9 +14,6 @@ from django.test import LiveServerTestCase, TestCase from django.test.utils import override_settings from django.urls import reverse -from django.utils.six import StringIO -from django.utils.six.moves.urllib import request -from django.utils.six.moves.urllib.error import HTTPError from linkcheck.models import Link, Url from linkcheck.views import get_jquery_min_js diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py index e04b39f..64e0cc1 100644 --- a/linkcheck/tests/urls.py +++ b/linkcheck/tests/urls.py @@ -1,6 +1,3 @@ -# -*- encoding: utf-8 -*- -from __future__ import unicode_literals - from django.conf.urls import include, url from django.contrib import admin from django import http diff --git a/linkcheck/utils.py b/linkcheck/utils.py index 6240815..f3b2711 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -15,7 +15,7 @@ class LinkCheckHandler(ClientHandler): def load_middleware(self): self.ignore_keywords = ['reversion.middleware','MaintenanceModeMiddleware'] - super(LinkCheckHandler, self).load_middleware() + super().load_middleware() new_request_middleware = [] #############################_request_middleware################################# diff --git a/linkcheck/views.py b/linkcheck/views.py index b3f1e71..5408988 100644 --- a/linkcheck/views.py +++ b/linkcheck/views.py @@ -5,13 +5,13 @@ import django from django import forms from django.conf import settings -from django.contrib.admin.templatetags.admin_static import static from django.contrib.admin.views.decorators import staff_member_required from django.contrib.contenttypes.models import ContentType from django.core.exceptions import ObjectDoesNotExist from django.core.paginator import Paginator from django.http import HttpResponse from django.shortcuts import render +from django.templatetags.static import static from django.urls import reverse, NoReverseMatch from django.views.decorators.csrf import csrf_exempt diff --git a/setup.py b/setup.py index 49dd356..1dee704 100644 --- a/setup.py +++ b/setup.py @@ -37,12 +37,12 @@ def read(fname): 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Framework :: Django', ], ) From adf92049126ee54d3330eeb7ef0a5f9c1c9005c3 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 11 Jan 2020 11:04:09 +0100 Subject: [PATCH 045/188] Added tests for Django 3.0.x --- .travis.yml | 15 ++++++++++----- CHANGELOG | 3 ++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 567ef6b..2e28230 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,17 +5,22 @@ python: - 3.6 - 3.7 env: - - DJANGO=django==1.11.23 + - DJANGO=django==1.11.27 - DJANGO=django==2.0.13 - - DJANGO=django==2.1.11 - - DJANGO=django==2.2.4 + - DJANGO=django==2.1.15 + - DJANGO=django==2.2.9 + - DJANGO=django==3.0.2 matrix: fast_finish: true exclude: - python: 3.4 - env: DJANGO=django==2.1.11 + env: DJANGO=django==2.1.15 - python: 3.4 - env: DJANGO=django==2.2.4 + env: DJANGO=django==2.2.9 + - python: 3.4 + env: DJANGO=django==3.0.2 + - python: 3.5 + env: DJANGO=django==3.0.2 install: - travis_retry pip install $DJANGO - travis_retry pip install requests diff --git a/CHANGELOG b/CHANGELOG index f8026bb..801e5f3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ 1.7 (unreleased) -* Dropped support for Python 2 and Django < 1.11 +* Dropped support for Python 2 and Django < 1.11. +* Added support for Django 3.0. * Made more usage of the requests library. 1.6 (2019-03-20) From 27ba10cb989ef6c50598a124fe1805d5f7edfc6f Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 11 Jan 2020 11:20:27 +0100 Subject: [PATCH 046/188] Bumped version to 1.7 --- CHANGELOG | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 801e5f3..f193887 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -1.7 (unreleased) +1.7 (2020-01-13) * Dropped support for Python 2 and Django < 1.11. * Added support for Django 3.0. diff --git a/setup.py b/setup.py index 1dee704..5250430 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ def read(fname): setup( name='django-linkcheck', - version='1.6', + version='1.7', description="A Django app that will analyze and report on links in any " "model that you register with it.", long_description=read('README.rst'), From 1f285396610cd617cbec48746f40b754a5aa732a Mon Sep 17 00:00:00 2001 From: Chen Zhe Date: Sat, 11 Jul 2020 10:23:16 +0800 Subject: [PATCH 047/188] make sure we pass str to parser.feed --- linkcheck/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index 08b42e3..d4eacc0 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -102,6 +102,8 @@ def parse_images(obj, field): def parse_anchors(content): parser = AnchorLister() + if not isinstance(content, str): + content = str(content) parser.feed(content) parser.close() return parser.names From a75cd57502a9fb1eac5e0a49c23f19a69a3ad4cd Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 3 Oct 2020 16:13:59 +0200 Subject: [PATCH 048/188] Add tests for Django 3.1 --- .travis.yml | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2e28230..88400c5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,23 +4,29 @@ python: - 3.5 - 3.6 - 3.7 + - 3.8 env: - - DJANGO=django==1.11.27 - - DJANGO=django==2.0.13 - - DJANGO=django==2.1.15 - - DJANGO=django==2.2.9 - - DJANGO=django==3.0.2 + - DJANGO="Django==1.11.*" + - DJANGO="Django==2.0.*" + - DJANGO="Django==2.1.*" + - DJANGO="Django==2.2.*" + - DJANGO="Django==3.0.*" + - DJANGO="Django==3.1.*" matrix: fast_finish: true exclude: - python: 3.4 - env: DJANGO=django==2.1.15 + env: DJANGO="Django==2.1.*" - python: 3.4 - env: DJANGO=django==2.2.9 + env: DJANGO="Django==2.2.*" - python: 3.4 - env: DJANGO=django==3.0.2 + env: DJANGO="Django==3.0.*" - python: 3.5 - env: DJANGO=django==3.0.2 + env: DJANGO="Django==3.0.*" + - python: 3.4 + env: DJANGO="Django==3.1.*" + - python: 3.5 + env: DJANGO="Django==3.1.*" install: - travis_retry pip install $DJANGO - travis_retry pip install requests From e7af769f9d5c147cbc906489b9567243520b5115 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 11 Nov 2020 10:54:51 +0100 Subject: [PATCH 049/188] Add SECRET_KEY to default test settings --- runtests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/runtests.py b/runtests.py index f33872e..4a59f8a 100644 --- a/runtests.py +++ b/runtests.py @@ -37,6 +37,7 @@ ], }, }], + 'SECRET_KEY': 'arandomstring', 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT': 1, } settings.configure(**test_settings) From 7dc77d17f417c07c103cda6d4077254a09fd5c61 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 11 Nov 2020 11:02:14 +0100 Subject: [PATCH 050/188] Removed Python 2 import shims --- linkcheck/__init__.py | 9 +-------- linkcheck/listeners.py | 9 ++------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index d4eacc0..c77aade 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -1,12 +1,5 @@ import threading - -try: - from django.utils.html_parser import HTMLParser # Gone in Django 2.1 -except ImportError: - try: - from HTMLParser import HTMLParser # Python 2 - except ImportError: - from html.parser import HTMLParser # Python 3 +from html.parser import HTMLParser # A global lock, showing whether linkcheck is busy update_lock = threading.Lock() diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index f12de5d..d025834 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -1,14 +1,9 @@ import os.path import sys import time +from queue import LifoQueue from threading import Thread -try: - import Queue -except ImportError: - # Python 3 - import queue as Queue - from django.apps import apps from django.conf import settings from django.contrib import messages @@ -27,7 +22,7 @@ from linkcheck.models import Url, Link -tasks_queue = Queue.LifoQueue() +tasks_queue = LifoQueue() worker_running = False tests_running = len(sys.argv) > 1 and sys.argv[1] == 'test' or sys.argv[0].endswith('runtests.py') From 4fb2ac48f7b9a46e3395d036042564e501573884 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 11 Nov 2020 10:49:19 +0100 Subject: [PATCH 051/188] Separate filebrowser code in its own module --- linkcheck/filebrowser.py | 90 ++++++++++++++++++++++++++++++++++++++++ linkcheck/listeners.py | 88 +-------------------------------------- 2 files changed, 92 insertions(+), 86 deletions(-) create mode 100644 linkcheck/filebrowser.py diff --git a/linkcheck/filebrowser.py b/linkcheck/filebrowser.py new file mode 100644 index 0000000..f31bf93 --- /dev/null +++ b/linkcheck/filebrowser.py @@ -0,0 +1,90 @@ +"""Integrate with django-filebrowser if present.""" +import os.path + +from django.conf import settings +from django.contrib import messages + +try: + from filebrowser.views import filebrowser_post_upload + from filebrowser.views import filebrowser_post_rename + from filebrowser.views import filebrowser_post_delete + from filebrowser.settings import DIRECTORY + FILEBROWSER_PRESENT = True +except ImportError: + FILEBROWSER_PRESENT = False + +from linkcheck.models import Url + + +def get_relative_media_url(): + if settings.MEDIA_URL.startswith('http'): + relative_media_url = ('/'+'/'.join(settings.MEDIA_URL.split('/')[3:]))[:-1] + else: + relative_media_url = settings.MEDIA_URL + return relative_media_url + + +def handle_upload(sender, path=None, **kwargs): + url = os.path.join(get_relative_media_url(), kwargs['file'].url_relative) + url_qs = Url.objects.filter(url=url).filter(status=False) + count = url_qs.count() + if count: + url_qs.update(status=True, message='Working document link') + msg = "Please note. Uploading %s has corrected %s broken link%s. See the Link Manager for more details" % (url, count, count > 1 and 's' or '') + messages.info(sender, msg) + + +def handle_rename(sender, path=None, **kwargs): + + def isdir(filename): + if filename.count('.'): + return False + else: + return True + + old_url = os.path.join(get_relative_media_url(), DIRECTORY, path, kwargs['filename']) + new_url = os.path.join(get_relative_media_url(), DIRECTORY, path, kwargs['new_filename']) + # Renaming a file will cause it's urls to become invalid + # Renaming a directory will cause the urls of all it's contents to become invalid + old_url_qs = Url.objects.filter(url=old_url).filter(status=True) + if isdir(kwargs['filename']): + old_url_qs = Url.objects.filter(url__startswith=old_url).filter(status=True) + old_count = old_url_qs.count() + if old_count: + old_url_qs.update(status=False, message='Missing Document') + msg = "Warning. Renaming %s has caused %s link%s to break. Please use the Link Manager to fix them" % (old_url, old_count, old_count > 1 and 's' or '') + messages.info(sender, msg) + + # The new directory may fix some invalid links, so we also check for that + if isdir(kwargs['new_filename']): + new_count = 0 + new_url_qs = Url.objects.filter(url__startswith=new_url).filter(status=False) + for url in new_url_qs: + if url.check_url(): + new_count += 1 + else: + new_url_qs = Url.objects.filter(url=new_url).filter(status=False) + new_count = new_url_qs.count() + if new_count: + new_url_qs.update(status=True, message='Working document link') + if new_count: + msg = "Please note. Renaming %s has corrected %s broken link%s. See the Link Manager for more details" % (new_url, new_count, new_count > 1 and 's' or '') + messages.info(sender, msg) + + +def handle_delete(sender, path=None, **kwargs): + + url = os.path.join(get_relative_media_url(), DIRECTORY, path, kwargs['filename']) + url_qs = Url.objects.filter(url=url).filter(status=True) + count = url_qs.count() + if count: + url_qs.update(status=False, message='Missing Document') + msg = "Warning. Deleting %s has caused %s link%s to break. Please use the Link Manager to fix them" % (url, count, count > 1 and 's' or '') + messages.info(sender, msg) + + +def register_listeners(): + if FILEBROWSER_PRESENT: + filebrowser_post_upload.connect(handle_upload) + filebrowser_post_rename.connect(handle_rename) + filebrowser_post_delete.connect(handle_delete) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index d025834..381aa8d 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -1,23 +1,12 @@ -import os.path import sys import time from queue import LifoQueue from threading import Thread from django.apps import apps -from django.conf import settings -from django.contrib import messages from django.db.models import signals as model_signals -try: - from filebrowser.views import filebrowser_post_upload - from filebrowser.views import filebrowser_post_rename - from filebrowser.views import filebrowser_post_delete - from filebrowser.settings import DIRECTORY - FILEBROWSER_PRESENT = True -except ImportError: - FILEBROWSER_PRESENT = False - +from . import filebrowser from . import update_lock from linkcheck.models import Url, Link @@ -192,77 +181,4 @@ def instance_pre_delete(sender, instance, **kwargs): model_signals.post_save.connect(instance_post_save, sender=linklist_cls.model) model_signals.pre_delete.connect(instance_pre_delete, sender=linklist_cls.model) - -# Integrate with django-filebrowser if present - -def get_relative_media_url(): - if settings.MEDIA_URL.startswith('http'): - relative_media_url = ('/'+'/'.join(settings.MEDIA_URL.split('/')[3:]))[:-1] - else: - relative_media_url = settings.MEDIA_URL - return relative_media_url - - -def handle_upload(sender, path=None, **kwargs): - url = os.path.join(get_relative_media_url(), kwargs['file'].url_relative) - url_qs = Url.objects.filter(url=url).filter(status=False) - count = url_qs.count() - if count: - url_qs.update(status=True, message='Working document link') - msg = "Please note. Uploading %s has corrected %s broken link%s. See the Link Manager for more details" % (url, count, count>1 and 's' or '') - messages.info(sender, msg) - - -def handle_rename(sender, path=None, **kwargs): - - def isdir(filename): - if filename.count('.'): - return False - else: - return True - - old_url = os.path.join(get_relative_media_url(), DIRECTORY, path, kwargs['filename']) - new_url = os.path.join(get_relative_media_url(), DIRECTORY, path, kwargs['new_filename']) - # Renaming a file will cause it's urls to become invalid - # Renaming a directory will cause the urls of all it's contents to become invalid - old_url_qs = Url.objects.filter(url=old_url).filter(status=True) - if isdir(kwargs['filename']): - old_url_qs = Url.objects.filter(url__startswith=old_url).filter(status=True) - old_count = old_url_qs.count() - if old_count: - old_url_qs.update(status=False, message='Missing Document') - msg = "Warning. Renaming %s has caused %s link%s to break. Please use the Link Manager to fix them" % (old_url, old_count, old_count>1 and 's' or '') - messages.info(sender, msg) - - # The new directory may fix some invalid links, so we also check for that - if isdir(kwargs['new_filename']): - new_count = 0 - new_url_qs = Url.objects.filter(url__startswith=new_url).filter(status=False) - for url in new_url_qs: - if url.check_url(): - new_count += 1 - else: - new_url_qs = Url.objects.filter(url=new_url).filter(status=False) - new_count = new_url_qs.count() - if new_count: - new_url_qs.update(status=True, message='Working document link') - if new_count: - msg = "Please note. Renaming %s has corrected %s broken link%s. See the Link Manager for more details" % (new_url, new_count, new_count>1 and 's' or '') - messages.info(sender, msg) - - -def handle_delete(sender, path=None, **kwargs): - - url = os.path.join(get_relative_media_url(), DIRECTORY, path, kwargs['filename']) - url_qs = Url.objects.filter(url=url).filter(status=True) - count = url_qs.count() - if count: - url_qs.update(status=False, message='Missing Document') - msg = "Warning. Deleting %s has caused %s link%s to break. Please use the Link Manager to fix them" % (url, count, count>1 and 's' or '') - messages.info(sender, msg) - - -if FILEBROWSER_PRESENT: - filebrowser_post_upload.connect(handle_upload) - filebrowser_post_rename.connect(handle_rename) - filebrowser_post_delete.connect(handle_delete) +filebrowser.register_listeners() From 42121dd1ff18836fe0b671f36309fbe638d3b53c Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 11 Nov 2020 17:52:03 +0100 Subject: [PATCH 052/188] Removed test branches for outdated Django versions --- linkcheck/tests/test_linkcheck.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 3f68fe5..bfce738 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -93,8 +93,7 @@ def test_internal_check_view_redirect(self): uv = Url(url="/http/brokenredirect/", still_exists=True) uv.check_url() self.assertEqual(uv.status, False) - redirect_code = 301 if django.VERSION < (1, 9) else 302 - self.assertEqual(uv.message, 'This link redirects: code %d (Broken redirect)' % redirect_code) + self.assertEqual(uv.message, 'This link redirects: code 302 (Broken redirect)') def test_internal_check_found(self): uv = Url(url="/public/", still_exists=True) @@ -380,9 +379,6 @@ def test_coverage_view(self): class GetJqueryMinJsTestCase(TestCase): def test(self): - if django.VERSION < (1, 10): - self.assertEqual('admin/js/jquery.min.js', get_jquery_min_js()) - else: - self.assertEqual( - 'admin/js/vendor/jquery/jquery.min.js', get_jquery_min_js() - ) + self.assertEqual( + 'admin/js/vendor/jquery/jquery.min.js', get_jquery_min_js() + ) From dd7d9a9c8bd4740b1189d5ccfc05ea70a0d34525 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 11 Nov 2020 17:39:05 +0100 Subject: [PATCH 053/188] Replaced implicit by explicit listener registration --- linkcheck/apps.py | 4 ++-- linkcheck/listeners.py | 28 ++++++++++++++-------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/linkcheck/apps.py b/linkcheck/apps.py index 87c8027..d271b95 100644 --- a/linkcheck/apps.py +++ b/linkcheck/apps.py @@ -47,11 +47,11 @@ def build_linklists(self): class LinkcheckConfig(BaseLinkcheckConfig): def ready(self): from .linkcheck_settings import DISABLE_LISTENERS + from .listeners import register_listeners super().ready() if not DISABLE_LISTENERS: - # This import will register listeners - from . import listeners + register_listeners() from .models import Link, link_post_delete post_delete.connect(link_post_delete, sender=Link) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index 381aa8d..2b9cca4 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -168,17 +168,17 @@ def instance_pre_delete(sender, instance, **kwargs): old_urls.update(status=False, message='Broken internal link') -# 1. register listeners for the objects that contain Links -for linklist_name, linklist_cls in apps.get_app_config('linkcheck').all_linklists.items(): - model_signals.post_save.connect(check_instance_links, sender=linklist_cls.model) - model_signals.post_delete.connect(delete_instance_links, sender=linklist_cls.model) - - # 2. register listeners for the objects that are targets of Links, - # only when get_absolute_url() is defined for the model - - if getattr(linklist_cls.model, 'get_absolute_url', None): - model_signals.pre_save.connect(instance_pre_save, sender=linklist_cls.model) - model_signals.post_save.connect(instance_post_save, sender=linklist_cls.model) - model_signals.pre_delete.connect(instance_pre_delete, sender=linklist_cls.model) - -filebrowser.register_listeners() +def register_listeners(): + # 1. register listeners for the objects that contain Links + for linklist_name, linklist_cls in apps.get_app_config('linkcheck').all_linklists.items(): + model_signals.post_save.connect(check_instance_links, sender=linklist_cls.model) + model_signals.post_delete.connect(delete_instance_links, sender=linklist_cls.model) + + # 2. register listeners for the objects that are targets of Links, + # only when get_absolute_url() is defined for the model + if getattr(linklist_cls.model, 'get_absolute_url', None): + model_signals.pre_save.connect(instance_pre_save, sender=linklist_cls.model) + model_signals.post_save.connect(instance_post_save, sender=linklist_cls.model) + model_signals.pre_delete.connect(instance_pre_delete, sender=linklist_cls.model) + + filebrowser.register_listeners() From 214bd026f34ee83bed89356e75c37b602e3d29df Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 25 Feb 2021 20:56:01 +0100 Subject: [PATCH 054/188] Dropped support for Python 3.4 --- .travis.yml | 9 --------- setup.py | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index 88400c5..64d9117 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: python python: - - 3.4 - 3.5 - 3.6 - 3.7 @@ -15,16 +14,8 @@ env: matrix: fast_finish: true exclude: - - python: 3.4 - env: DJANGO="Django==2.1.*" - - python: 3.4 - env: DJANGO="Django==2.2.*" - - python: 3.4 - env: DJANGO="Django==3.0.*" - python: 3.5 env: DJANGO="Django==3.0.*" - - python: 3.4 - env: DJANGO="Django==3.1.*" - python: 3.5 env: DJANGO="Django==3.1.*" install: diff --git a/setup.py b/setup.py index 5250430..b61280e 100644 --- a/setup.py +++ b/setup.py @@ -39,10 +39,10 @@ def read(fname): 'Programming Language :: Python', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Framework :: Django', ], ) From 4535f16abc2331cb7a86f7960920c0e45520ef1d Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 25 Feb 2021 20:31:10 +0100 Subject: [PATCH 055/188] Ensure ALLOWED_HOSTS doesn't prevent checking internal links --- linkcheck/models.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index 5b0c758..3b1f63a 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -13,6 +13,7 @@ from django.contrib.contenttypes.models import ContentType from django.db import models from django.test.client import Client +from django.test.utils import modify_settings from django.utils.encoding import iri_to_uri from django.utils.timezone import now @@ -214,7 +215,8 @@ def _check_internal(self, tested_url): settings.PREPEND_WWW = False c = Client() c.handler = LinkCheckHandler() - response = c.get(tested_url) + with modify_settings(ALLOWED_HOSTS={'append': 'testserver'}): + response = c.get(tested_url) if response.status_code == 200: self.message = 'Working internal link' self.status = True @@ -236,7 +238,8 @@ def _check_internal(self, tested_url): self.message = 'Failed to parse HTML for anchor' elif response.status_code == 302 or response.status_code == 301: - redir_response = c.get(tested_url, follow=True) + with modify_settings(ALLOWED_HOSTS={'append': 'testserver'}): + redir_response = c.get(tested_url, follow=True) if redir_response.status_code == 200: redir_state = 'Working redirect' self.status = True From 339d6b3a5a8522ed860db40cfad5de550c456b65 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 11 Nov 2020 18:21:44 +0100 Subject: [PATCH 056/188] Added unregister/enable/disable listeners utilities --- CHANGELOG | 7 +++++++ README.rst | 20 ++++++++++++++++++ linkcheck/filebrowser.py | 7 +++++++ linkcheck/listeners.py | 35 +++++++++++++++++++++++++++++++ linkcheck/tests/test_linkcheck.py | 34 ++++++++++++++++++++++++++++++ 5 files changed, 103 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index f193887..679be08 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,10 @@ +Unreleased + +* Added explicit `listeners.register_listeners` and + `listeners.unregister_listeners` functions. +* Added `listeners.enable_listeners` and `listeners.disable_listeners` context + managers. + 1.7 (2020-01-13) * Dropped support for Python 2 and Django < 1.11. diff --git a/README.rst b/README.rst index 7c02e00..e096db0 100644 --- a/README.rst +++ b/README.rst @@ -109,6 +109,12 @@ to the ``--limit`` (``--l``) command option. Settings -------- +LINKCHECK_DISABLE_LISTENERS +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A setting to totally disable linkcheck, typically when running tests. See also +the context managers below. + LINKCHECK_EXTERNAL_RECHECK_INTERVAL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -192,3 +198,17 @@ Tests can be run standalone by using the runtests.py script in linkcheck root: $ python runtests.py If you want to run linkcheck tests in the context of your project, you should include 'linkcheck.tests.sampleapp' in your INSTALLED_APPS setting. + +Linkcheck gives you two context managers to enable or disable listeners in your +own tests. For example: + + def test_something_without_listeners(self): + with listeners.disable_listeners(): + # Create/update here without linkcheck intervening. + +In the case you defined the LINKCHECK_DISABLE_LISTENERS setting, you can +temporarily enable it by: + + def test_something_with_listeners(self): + with listeners.enable_listeners(): + # Create/update here and see linkcheck activated. diff --git a/linkcheck/filebrowser.py b/linkcheck/filebrowser.py index f31bf93..1c5303a 100644 --- a/linkcheck/filebrowser.py +++ b/linkcheck/filebrowser.py @@ -88,3 +88,10 @@ def register_listeners(): filebrowser_post_upload.connect(handle_upload) filebrowser_post_rename.connect(handle_rename) filebrowser_post_delete.connect(handle_delete) + + +def unregister_listeners(): + if FILEBROWSER_PRESENT: + filebrowser_post_upload.disconnect(handle_upload) + filebrowser_post_rename.disconnect(handle_rename) + filebrowser_post_delete.disconnect(handle_delete) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index 2b9cca4..1e699a5 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -1,5 +1,6 @@ import sys import time +from contextlib import contextmanager from queue import LifoQueue from threading import Thread @@ -182,3 +183,37 @@ def register_listeners(): model_signals.pre_delete.connect(instance_pre_delete, sender=linklist_cls.model) filebrowser.register_listeners() + + +def unregister_listeners(): + # 1. register listeners for the objects that contain Links + for linklist_name, linklist_cls in apps.get_app_config('linkcheck').all_linklists.items(): + model_signals.post_save.disconnect(check_instance_links, sender=linklist_cls.model) + model_signals.post_delete.disconnect(delete_instance_links, sender=linklist_cls.model) + + # 2. register listeners for the objects that are targets of Links, + # only when get_absolute_url() is defined for the model + if getattr(linklist_cls.model, 'get_absolute_url', None): + model_signals.pre_save.disconnect(instance_pre_save, sender=linklist_cls.model) + model_signals.post_save.disconnect(instance_post_save, sender=linklist_cls.model) + model_signals.pre_delete.disconnect(instance_pre_delete, sender=linklist_cls.model) + + filebrowser.unregister_listeners() + + +@contextmanager +def enable_listeners(*args, **kwargs): + register_listeners() + try: + yield + finally: + unregister_listeners() + + +@contextmanager +def disable_listeners(*args, **kwargs): + unregister_listeners() + try: + yield + finally: + register_listeners() diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index bfce738..a29f95f 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -15,6 +15,9 @@ from django.test.utils import override_settings from django.urls import reverse +from linkcheck.listeners import ( + enable_listeners, disable_listeners, register_listeners, unregister_listeners, +) from linkcheck.models import Link, Url from linkcheck.views import get_jquery_min_js @@ -347,6 +350,37 @@ def test_update_object(self): self.assertEqual(Url.objects.all()[0].url, good_url) +class RegisteringTests(TestCase): + good_url = "/public/" + + def test_unregister(self): + self.assertEqual(Link.objects.count(), 0) + unregister_listeners() + Author.objects.create(name="John Smith", website=self.good_url) + self.assertEqual(Link.objects.count(), 0) + register_listeners() + Author.objects.create(name="Jill Smith", website=self.good_url) + self.assertEqual(Link.objects.count(), 1) + + def test_disable_listeners(self): + self.assertEqual(Link.objects.count(), 0) + with disable_listeners(): + Author.objects.create(name="John Smith", website=self.good_url) + self.assertEqual(Link.objects.count(), 0) + Author.objects.create(name="Jill Smith", website=self.good_url) + self.assertEqual(Link.objects.count(), 1) + + def test_enable_listeners(self): + self.assertEqual(Link.objects.count(), 0) + unregister_listeners() + with enable_listeners(): + Author.objects.create(name="John Smith", website=self.good_url) + self.assertEqual(Link.objects.count(), 1) + Author.objects.create(name="Jill Smith", website=self.good_url) + self.assertEqual(Link.objects.count(), 1) + register_listeners() + + class ViewTestCase(TestCase): def setUp(self): User.objects.create_superuser('admin', 'admin@example.org', 'password') From 969676f457db7e71244c08241b5b7e7a2f2d7956 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 25 Feb 2021 21:51:46 +0100 Subject: [PATCH 057/188] Fill Changelog and bump to 1.8 --- CHANGELOG | 7 ++++++- setup.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 679be08..532a781 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,9 +1,14 @@ -Unreleased +1.8 (2021-02-25) * Added explicit `listeners.register_listeners` and `listeners.unregister_listeners` functions. * Added `listeners.enable_listeners` and `listeners.disable_listeners` context managers. +* Avoid crash when looking for anchors in response content. +* Avoid possible failures when checking internal links depending on + ALLOWED_HOSTS setting. +* Confirmed compatibility with Django 3.1. +* Dropped support for Python 3.4. 1.7 (2020-01-13) diff --git a/setup.py b/setup.py index b61280e..703a458 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ def read(fname): setup( name='django-linkcheck', - version='1.7', + version='1.8', description="A Django app that will analyze and report on links in any " "model that you register with it.", long_description=read('README.rst'), From d086a1ef2d8db56c6fe3f4e49f80238b9bcce782 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 1 Apr 2021 08:32:46 +0200 Subject: [PATCH 058/188] Prepare the 1.8.1 correcting release --- CHANGELOG | 6 ++++++ setup.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 532a781..9da8535 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +1.8.1 (2021-04-01) + +* The 1.8 release contained unwanted temporary stuff and was + therefore a broken release. Many thanks to Stefan Borer for + noticing that. + 1.8 (2021-02-25) * Added explicit `listeners.register_listeners` and diff --git a/setup.py b/setup.py index 703a458..9cd2194 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ def read(fname): setup( name='django-linkcheck', - version='1.8', + version='1.8.1', description="A Django app that will analyze and report on links in any " "model that you register with it.", long_description=read('README.rst'), From df6ecc2eca891aba9018b9b9e9dc610a9dc49330 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Fri, 14 May 2021 16:25:45 +0200 Subject: [PATCH 059/188] Fixes #106 - Ignore raw `pre_save` signal If the model is saved exactly as presented (i.e. when loading a fixture), one should not query/modify other records in the database as the database might not be in a consistent state yet. --- linkcheck/listeners.py | 6 +++--- linkcheck/tests/sampleapp/fixture.json | 10 ++++++++++ linkcheck/tests/test_linkcheck.py | 7 +++++++ 3 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 linkcheck/tests/sampleapp/fixture.json diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index 1e699a5..b5084fb 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -111,9 +111,9 @@ def delete_instance_links(sender, instance, **kwargs): old_links.delete() -def instance_pre_save(sender, instance, **kwargs): - if not instance.pk: - # Ignore unsaved instances +def instance_pre_save(sender, instance, raw=False, **kwargs): + if not instance.pk or raw: + # Ignore unsaved instances or raw imports return current_url = instance.get_absolute_url() previous_url = sender.objects.get(pk=instance.pk).get_absolute_url() diff --git a/linkcheck/tests/sampleapp/fixture.json b/linkcheck/tests/sampleapp/fixture.json new file mode 100644 index 0000000..4ba1dde --- /dev/null +++ b/linkcheck/tests/sampleapp/fixture.json @@ -0,0 +1,10 @@ +[ + { + "model": "sampleapp.Book", + "pk": 1, + "fields": { + "title": "My Title", + "description": "My description" + } + } +] diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index a29f95f..68d6f53 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -416,3 +416,10 @@ def test(self): self.assertEqual( 'admin/js/vendor/jquery/jquery.min.js', get_jquery_min_js() ) + + +class FixtureTestCase(TestCase): + fixtures = ['linkcheck/tests/sampleapp/fixture.json'] + + def test_fixture(self): + self.assertEqual(Book.objects.count(), 1) From 21c1d2f2ae9a6612e72440baf8cbc6d160e1406a Mon Sep 17 00:00:00 2001 From: Stefan Borer Date: Thu, 10 Jun 2021 11:26:47 +0200 Subject: [PATCH 060/188] chore(ci): support python 3.9, django 3.2 --- .travis.yml | 4 ++++ linkcheck/tests/test_linkcheck.py | 4 +++- setup.py | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 64d9117..2f74ae7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ python: - 3.6 - 3.7 - 3.8 + - 3.9 env: - DJANGO="Django==1.11.*" - DJANGO="Django==2.0.*" @@ -11,6 +12,7 @@ env: - DJANGO="Django==2.2.*" - DJANGO="Django==3.0.*" - DJANGO="Django==3.1.*" + - DJANGO="Django==3.2.*" matrix: fast_finish: true exclude: @@ -18,6 +20,8 @@ matrix: env: DJANGO="Django==3.0.*" - python: 3.5 env: DJANGO="Django==3.1.*" + - python: 3.5 + env: DJANGO="Django==3.2.*" install: - travis_retry pip install $DJANGO - travis_retry pip install requests diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 68d6f53..14784a3 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -92,7 +92,9 @@ def test_internal_check_view_redirect(self): uv = Url(url="/admin/linkcheck", still_exists=True) uv.check_url() self.assertEqual(uv.status, True) - self.assertEqual(uv.message, 'This link redirects: code 301 (Working redirect)') + self.assertIn(uv.message, + ['This link redirects: code %s (Working redirect)' % status for status in [301, 302]] + ) uv = Url(url="/http/brokenredirect/", still_exists=True) uv.check_url() self.assertEqual(uv.status, False) diff --git a/setup.py b/setup.py index 9cd2194..976808f 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,7 @@ def read(fname): 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Framework :: Django', ], ) From 9d9b70ef7f558283610b15884f1024ead023c902 Mon Sep 17 00:00:00 2001 From: Stefan Borer Date: Thu, 10 Jun 2021 11:53:29 +0200 Subject: [PATCH 061/188] chore: remove usage of deprecated imp module --- linkcheck/apps.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/linkcheck/apps.py b/linkcheck/apps.py index d271b95..04efa69 100644 --- a/linkcheck/apps.py +++ b/linkcheck/apps.py @@ -1,5 +1,4 @@ -import imp -from importlib import import_module +import importlib from django.apps import AppConfig, apps from django.db.models.signals import post_delete @@ -21,11 +20,10 @@ def ready(self): def build_linklists(self): """Autodiscovery of linkLists""" for app in apps.get_app_configs(): - try: - imp.find_module('linklists', [app.path]) - except ImportError: + module_name = "%s.linklists" % app.name + if not importlib.util.find_spec(module_name): continue - the_module = import_module("%s.linklists" % app.name) + the_module = importlib.import_module(module_name) try: for k in the_module.linklists.keys(): if k in self.all_linklists.keys(): From b461743ccceaddeab6753ae42f89915dfc0f1cbe Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 18 Dec 2021 19:03:05 +0100 Subject: [PATCH 062/188] Fixes #110 - Replaced Travis CI by GitHub workflow --- .github/workflows/test.yml | 33 +++++++++++++++++++++++++++++++++ .travis.yml | 31 ------------------------------- README.rst | 5 +++-- 3 files changed, 36 insertions(+), 33 deletions(-) create mode 100644 .github/workflows/test.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..28d4297 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,33 @@ +name: Test + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + matrix: + python-version: ['3.6', '3.7', '3.8', '3.9'] + django-version: ['2.2.0', '3.0.0', '3.1.0', '3.2.0'] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade django~=${{ matrix.django-version }} + python -m pip install --upgrade requests + + - name: Run tests + run: python runtests.py diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2f74ae7..0000000 --- a/.travis.yml +++ /dev/null @@ -1,31 +0,0 @@ -language: python -python: - - 3.5 - - 3.6 - - 3.7 - - 3.8 - - 3.9 -env: - - DJANGO="Django==1.11.*" - - DJANGO="Django==2.0.*" - - DJANGO="Django==2.1.*" - - DJANGO="Django==2.2.*" - - DJANGO="Django==3.0.*" - - DJANGO="Django==3.1.*" - - DJANGO="Django==3.2.*" -matrix: - fast_finish: true - exclude: - - python: 3.5 - env: DJANGO="Django==3.0.*" - - python: 3.5 - env: DJANGO="Django==3.1.*" - - python: 3.5 - env: DJANGO="Django==3.2.*" -install: - - travis_retry pip install $DJANGO - - travis_retry pip install requests - - pip install -e . -script: python runtests.py -notifications: - email: false diff --git a/README.rst b/README.rst index e096db0..0ea4c43 100644 --- a/README.rst +++ b/README.rst @@ -2,8 +2,9 @@ django-linkcheck =================== -.. image:: https://travis-ci.org/DjangoAdminHackers/django-linkcheck.svg?branch=master - :target: https://travis-ci.org/DjangoAdminHackers/django-linkcheck +.. image:: https://github.com/DjangoAdminHackers/django-linkcheck/workflows/Test/badge.svg + :target: https://github.com/DjangoAdminHackers/django-linkcheck/actions + :alt: GitHub Actions A fairly flexible app that will analyze and report on links in any model that you register with it. From 3229ef51380ab6f2e78ac8654a25f045fc3f9fbf Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 18 Dec 2021 18:21:21 +0100 Subject: [PATCH 063/188] Removed mention of outdated django-admin-blocks --- README.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.rst b/README.rst index 0ea4c43..895bae6 100644 --- a/README.rst +++ b/README.rst @@ -48,12 +48,6 @@ Basic usage #. View ``/admin/linkcheck/`` from your browser. -The file ``notifications.py`` is completely optional. It works with -django-admin-blocks_ to display a notification about broken links as -shown in the screenshot above. - -.. _django-admin-blocks: https://github.com/DjangoAdminHackers/django-admin-blocks - We are aware that this documentation is on the brief side of things so any suggestions for elaboration or clarification would be gratefully accepted. From a7f0a573d06ed245b072c968af5ef79790bed2ac Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 18 Dec 2021 18:32:43 +0100 Subject: [PATCH 064/188] Removed support for Django<2.2 --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 895bae6..507cd71 100644 --- a/README.rst +++ b/README.rst @@ -23,7 +23,7 @@ automatically when objects are saved. This is handled by signals. Minimal requirements -------------------- -django-linkchecks requires Python 3 and Django 1.11. +django-linkchecks requires Python 3 and Django 2.2. Basic usage ----------- From d93e8486d3d4b1cc7a9cad6800590397ff31a8a2 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 18 Dec 2021 18:33:21 +0100 Subject: [PATCH 065/188] Replaced obsolete url() calls by path() --- README.rst | 2 +- linkcheck/tests/urls.py | 24 ++++++++++++------------ linkcheck/urls.py | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index 507cd71..b7544bd 100644 --- a/README.rst +++ b/README.rst @@ -44,7 +44,7 @@ Basic usage #. Add to your root url config:: - url(http://github.com/r'%5Eadmin/linkcheck/',%20include('linkcheck.urls')) + path('admin/linkcheck/', include('linkcheck.urls')) #. View ``/admin/linkcheck/`` from your browser. diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py index 64e0cc1..ab4b2e9 100644 --- a/linkcheck/tests/urls.py +++ b/linkcheck/tests/urls.py @@ -1,6 +1,6 @@ -from django.conf.urls import include, url -from django.contrib import admin from django import http +from django.contrib import admin +from django.urls import include, path from django.views.generic import RedirectView from linkcheck.tests.sampleapp import views @@ -8,14 +8,14 @@ handler404 = lambda *args, **kwargs: http.HttpResponseNotFound('') urlpatterns = [ - url(http://github.com/r'%5Eadmin/linkcheck/',%20include('linkcheck.urls')), - url(http://github.com/r'%5Eadmin/',%20admin.site.urls), - url(http://github.com/r'%5Epublic/',%20views.http_response,%20%7B'code':%20'200'%7D), - url(http://github.com/r'%5Ehttp/(?P%3Ccode%3E\d+)/$', views.http_response), - url(http://github.com/r'%5Ehttp/(?P%3Ccode%3E\d+)/rückmeldung/$', views.http_response), - url(http://github.com/r'%5Ehttp/getonly/(?P%3Ccode%3E\d+)/$', views.http_response_get_only), - url(http://github.com/r'%5Ehttp/redirect/(?P%3Ccode%3E\d+)/$', views.http_redirect), - url(http://github.com/r'%5Ehttp/redirect_to_404/),%20views.http_redirect_to_404), - url(http://github.com/r'%5Ehttp/brokenredirect/'),%20RedirectView.as_view(url='/non-existent/')), - url(http://github.com/r'%5Etimeout/),%20views.timeout), + path('admin/linkcheck/', include('linkcheck.urls')), + path('admin/', admin.site.urls), + path('public/', views.http_response, {'code': '200'}), + path('http//', views.http_response), + path('http//rückmeldung/', views.http_response), + path('http/getonly//', views.http_response_get_only), + path('http/redirect//', views.http_redirect), + path('http/redirect_to_404/', views.http_redirect_to_404), + path('http/brokenredirect/', RedirectView.as_view(url='/non-existent/')), + path('timeout/', views.timeout), ] diff --git a/linkcheck/urls.py b/linkcheck/urls.py index f0cf436..3106cc2 100644 --- a/linkcheck/urls.py +++ b/linkcheck/urls.py @@ -1,8 +1,8 @@ -from django.conf.urls import url +from django.urls import path from . import views urlpatterns = [ - url(http://github.com/r'%5Ecoverage/'),%20views.coverage,%20name='linkcheck_coverage'), - url(http://github.com/r'%5E.*'),%20views.report,%20name='linkcheck_report'), + path('coverage/', views.coverage, name='linkcheck_coverage'), + path('', views.report, name='linkcheck_report'), ] From 95c08451a41e6f6a461bbe2ed7540ce4661395f1 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Mon, 2 Nov 2020 09:48:23 +0100 Subject: [PATCH 066/188] Replaced NullBooleanField by BooleanField(null=True) NullBooleanField was deprecated in Django 3.1. --- linkcheck/migrations/0001_initial.py | 2 +- linkcheck/models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/linkcheck/migrations/0001_initial.py b/linkcheck/migrations/0001_initial.py index fc1e6fe..4a5e40e 100644 --- a/linkcheck/migrations/0001_initial.py +++ b/linkcheck/migrations/0001_initial.py @@ -28,7 +28,7 @@ class Migration(migrations.Migration): ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), ('url', models.CharField(unique=True, max_length=255)), ('last_checked', models.DateTimeField(null=True, blank=True)), - ('status', models.NullBooleanField()), + ('status', models.BooleanField(null=True)), ('message', models.CharField(max_length=1024, null=True, blank=True)), ('still_exists', models.BooleanField(default=False)), ], diff --git a/linkcheck/models.py b/linkcheck/models.py index 3b1f63a..4bedb1f 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -62,7 +62,7 @@ class Url(models.Model): """ url = models.CharField(max_length=MAX_URL_LENGTH, unique=True) # See http://www.boutell.com/newfaq/misc/urllength.html last_checked = models.DateTimeField(blank=True, null=True) - status = models.NullBooleanField() + status = models.BooleanField(null=True) message = models.CharField(max_length=1024, blank=True, null=True) still_exists = models.BooleanField(default=False) redirect_to = models.TextField(blank=True) From e1c6a916f1b7aae49b40eb9a4a33ad39dcda285d Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 18 Dec 2021 18:39:51 +0100 Subject: [PATCH 067/188] Replaced obsolete ifequal --- linkcheck/templates/linkcheck/report.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/linkcheck/templates/linkcheck/report.html b/linkcheck/templates/linkcheck/report.html index 9ae0277..827f995 100644 --- a/linkcheck/templates/linkcheck/report.html +++ b/linkcheck/templates/linkcheck/report.html @@ -141,10 +141,10 @@
Show:   - {% ifequal filter 'show_valid' %}Good links{% else %}Good links{% endifequal %}   - {% ifequal filter 'show_invalid' %}Broken links{% else %}Broken links{% endifequal %}   - {% ifequal filter 'show_unchecked' %}Untested links{% else %}Untested links{% endifequal %}   - {% ifequal filter 'ignored' %}Ignored links{% else %}Ignored links{% endifequal %} + {% if filter == 'show_valid' %}Good links{% else %}Good links{% endif %}   + {% if filter == 'show_invalid' %}Broken links{% else %}Broken links{% endif %}   + {% if filter == 'show_unchecked' %}Untested links{% else %}Untested links{% endif %}   + {% if filter == 'ignored' %}Ignored links{% else %}Ignored links{% endif %} ({{ ignored_count }})
From 86febe51bc8d87ce7d79597ab208b630a385ec2c Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 18 Dec 2021 18:44:59 +0100 Subject: [PATCH 068/188] Silenced some Django deprecation warnings --- linkcheck/__init__.py | 5 ++++- linkcheck/apps.py | 2 ++ runtests.py | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index c77aade..a0cb5a7 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -1,10 +1,13 @@ import threading from html.parser import HTMLParser +import django + # A global lock, showing whether linkcheck is busy update_lock = threading.Lock() -default_app_config = 'linkcheck.apps.LinkcheckConfig' +if django.VERSION <= (3, 2): + default_app_config = 'linkcheck.apps.LinkcheckConfig' class Lister(HTMLParser): diff --git a/linkcheck/apps.py b/linkcheck/apps.py index 04efa69..1d2532b 100644 --- a/linkcheck/apps.py +++ b/linkcheck/apps.py @@ -43,6 +43,8 @@ def build_linklists(self): class LinkcheckConfig(BaseLinkcheckConfig): + default = True + def ready(self): from .linkcheck_settings import DISABLE_LISTENERS from .listeners import register_listeners diff --git a/runtests.py b/runtests.py index 4a59f8a..de1804c 100644 --- a/runtests.py +++ b/runtests.py @@ -34,9 +34,11 @@ 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', 'django.template.context_processors.static', + 'django.template.context_processors.request', ], }, }], + 'DEFAULT_AUTO_FIELD': 'django.db.models.AutoField', 'SECRET_KEY': 'arandomstring', 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT': 1, } From a9f1bfa26986c70fb4288566dcc15530ddbdc122 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 18 Dec 2021 19:19:26 +0100 Subject: [PATCH 069/188] Tested with Django 4.0 --- .github/workflows/test.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 28d4297..28167c9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,12 @@ jobs: max-parallel: 5 matrix: python-version: ['3.6', '3.7', '3.8', '3.9'] - django-version: ['2.2.0', '3.0.0', '3.1.0', '3.2.0'] + django-version: ['2.2.0', '3.0.0', '3.1.0', '3.2.0', '4.0.0'] + exclude: + - python-version: '3.6' + django-version: '4.0.0' + - python-version: '3.7' + django-version: '4.0.0' steps: - uses: actions/checkout@v2 From 5a9eb40effc7c3da4f932765965cd9a9b89d06b7 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 23 Dec 2021 15:24:01 +0100 Subject: [PATCH 070/188] Changelog and bumped version for 1.9 --- CHANGELOG | 5 +++++ setup.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 9da8535..3468e52 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +1.9 (2021-12-23) + +* Added support for Django 3.2 and 4.0 and removed support for Django < 2.2. +* Ignore raw `pre_save` signal (Timo Ludwig, #106). + 1.8.1 (2021-04-01) * The 1.8 release contained unwanted temporary stuff and was diff --git a/setup.py b/setup.py index 976808f..01c4327 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ def read(fname): setup( name='django-linkcheck', - version='1.8.1', + version='1.9', description="A Django app that will analyze and report on links in any " "model that you register with it.", long_description=read('README.rst'), From 7c4d174e0b278e6e42e9a189324b200229ef33ba Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 23 Dec 2021 15:43:06 +0100 Subject: [PATCH 071/188] Add PyPI badge in README --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index b7544bd..3d4a855 100644 --- a/README.rst +++ b/README.rst @@ -6,6 +6,10 @@ django-linkcheck :target: https://github.com/DjangoAdminHackers/django-linkcheck/actions :alt: GitHub Actions +.. image:: https://img.shields.io/pypi/v/django-linkcheck.svg + :alt: PyPI version + :target: https://pypi.org/project/django-linkcheck/ + A fairly flexible app that will analyze and report on links in any model that you register with it. From 8cb97b165b1d26f498896c8cda61f26d98f7a8ec Mon Sep 17 00:00:00 2001 From: Giuliano Mele Date: Sat, 12 Mar 2022 10:22:50 +0100 Subject: [PATCH 072/188] Add object filtering hook --- README.rst | 11 +++++++++++ linkcheck/__init__.py | 3 +++ linkcheck/tests/sampleapp/linklists.py | 15 ++++++++++++++- linkcheck/tests/sampleapp/models.py | 6 ++++++ linkcheck/tests/test_linkcheck.py | 23 ++++++++++++++++++++++- 5 files changed, 56 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 3d4a855..2028152 100644 --- a/README.rst +++ b/README.rst @@ -83,6 +83,17 @@ customize the extracted links: ``image_fields``: a list of ``ImageField`` field names whose content will be considered as links. Empty ``ImageField`` content is always ignored. + ``filter_callable``: a callable which allows to pass a function as filter + for your linklist class. It allows to apply more advanced filter operations. + This function must be a class method and it should be passed the objects query + set and return the filtered objects. + Example usage in your linklists.py - only check latest versions:: + + @classmethod + def filter_callable(cls, objects): + latest = Model.objects.filter(id=OuterRef('id')).order_by('-version') + return objects.filter(version=Subquery(latest.values('version')[:1])) + Management commands ------------------- diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index a0cb5a7..d85f4f9 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -121,6 +121,7 @@ class Linklist: object_filter = None object_exclude = None + filter_callable = None def __get(self, name, obj, default=None): try: @@ -191,6 +192,8 @@ def objects(cls): objects = objects.filter(**cls.object_filter).distinct() if cls.object_exclude: objects = objects.exclude(**cls.object_exclude).distinct() + if cls.filter_callable: + objects = cls.filter_callable(objects) return objects def get_linklist(self, extra_filter=None): diff --git a/linkcheck/tests/sampleapp/linklists.py b/linkcheck/tests/sampleapp/linklists.py index 346bb65..9af5d85 100644 --- a/linkcheck/tests/sampleapp/linklists.py +++ b/linkcheck/tests/sampleapp/linklists.py @@ -1,5 +1,6 @@ +from django.db.models import Subquery, OuterRef from linkcheck import Linklist -from linkcheck.tests.sampleapp.models import Author, Book +from linkcheck.tests.sampleapp.models import Author, Book, Journal class BookLinklist(Linklist): @@ -16,7 +17,19 @@ class AuthorLinklist(Linklist): url_fields = ['website'] +class JournalLinklist(Linklist): + """ Class to let linkcheck app discover fields containing links """ + model = Journal + html_fields = ['description'] + + @classmethod + def filter_callable(cls, objects): + latest = Journal.objects.filter(title=OuterRef('title')).order_by('-version') + return objects.filter(version=Subquery(latest.values('version')[:1])) + + linklists = { 'Books': BookLinklist, 'Authors': AuthorLinklist, + 'Journals': JournalLinklist, } diff --git a/linkcheck/tests/sampleapp/models.py b/linkcheck/tests/sampleapp/models.py index c40034f..af87065 100644 --- a/linkcheck/tests/sampleapp/models.py +++ b/linkcheck/tests/sampleapp/models.py @@ -13,3 +13,9 @@ class Author(models.Model): # This model has purposefully no get_absolute_url name = models.CharField(max_length=50) website = models.URLField(blank=True) + + +class Journal(models.Model): + title = models.CharField(max_length=50) + description = models.TextField() + version = models.PositiveIntegerField(default=0) diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 14784a3..aa4332b 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -21,7 +21,7 @@ from linkcheck.models import Link, Url from linkcheck.views import get_jquery_min_js -from .sampleapp.models import Author, Book +from .sampleapp.models import Author, Book, Journal #MOCK addinfurl @@ -425,3 +425,24 @@ class FixtureTestCase(TestCase): def test_fixture(self): self.assertEqual(Book.objects.count(), 1) + + +class FilterCallableTestCase(TestCase): + def test_filter_callable(self): + all_linklists = apps.get_app_config('linkcheck').all_linklists + all_linklists['Journals'].html_fields = [] + Journal.objects.create(title='My Title', description=""" + My description Example""") + Journal.objects.create(title='My Title', version=1, description=""" + My new description Example""") + all_linklists['Journals'].html_fields = ['description'] + # assert there are two versions of the same journal + self.assertEqual(Journal.objects.count(), 2) + # assert command just finds the latest version of same journals + out = StringIO() + call_command('findlinks', stdout=out) + self.assertEqual( + out.getvalue(), + "Finding all new links...\n" + "1 new Url object(s), 1 new Link object(s), 0 Url object(s) deleted\n" + ) From dfa053fbebd832e17de477dd9653472b775d0261 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Wed, 23 Mar 2022 16:58:37 +0100 Subject: [PATCH 073/188] Changelog and bumped version for 1.9.1 --- CHANGELOG | 5 +++++ setup.py | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3468e52..7ca5fbd 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +1.9.1 (2022-03-23) + +* Added `Linklist.filter_callable` optional hook to allow for more + flexible link list filtering (Giuliano Mele). + 1.9 (2021-12-23) * Added support for Django 3.2 and 4.0 and removed support for Django < 2.2. diff --git a/setup.py b/setup.py index 01c4327..70d7ed3 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ def read(fname): setup( name='django-linkcheck', - version='1.9', + version='1.9.1', description="A Django app that will analyze and report on links in any " "model that you register with it.", long_description=read('README.rst'), @@ -39,7 +39,6 @@ def read(fname): 'Programming Language :: Python', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', From 01bbbe96391135b5621a1033d56fe3e169634630 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Thu, 17 Nov 2022 21:17:53 +0100 Subject: [PATCH 074/188] Upgraded Python/Django versions to current supported state --- .github/workflows/test.yml | 14 +++++++------- CHANGELOG | 5 +++++ linkcheck/tests/test_linkcheck.py | 3 ++- setup.py | 3 ++- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 28167c9..27b3de3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,19 +12,19 @@ jobs: strategy: max-parallel: 5 matrix: - python-version: ['3.6', '3.7', '3.8', '3.9'] - django-version: ['2.2.0', '3.0.0', '3.1.0', '3.2.0', '4.0.0'] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + django-version: ['3.2', '4.0', '4.1'] exclude: - - python-version: '3.6' - django-version: '4.0.0' - python-version: '3.7' - django-version: '4.0.0' + django-version: '4.0' + - python-version: '3.7' + django-version: '4.1' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/CHANGELOG b/CHANGELOG index 7ca5fbd..c9db5d2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +Unreleased + +* Added support for Python 3.10/3.11 and Django 4.1. +* Dropped support for Python 3.6 and Django < 3.2. + 1.9.1 (2022-03-23) * Added `Linklist.filter_callable` optional hook to allow for more diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index aa4332b..c8051ec 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -284,7 +284,8 @@ def test_found_links(self): self.assertEqual(Url.objects.all().count(), 2) self.assertQuerysetEqual( Url.objects.all().order_by('url'), - ["", ""] + ["http://www.example.org/", "http://www.example.org/logo.png"], + transform=lambda obj: obj.url ) def test_empty_url_field(self): diff --git a/setup.py b/setup.py index 70d7ed3..ad3bb89 100644 --- a/setup.py +++ b/setup.py @@ -39,10 +39,11 @@ def read(fname): 'Programming Language :: Python', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', 'Framework :: Django', ], ) From ad80516268b2c4e4c0a1bbda28e1097e2d7d6690 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Thu, 17 Nov 2022 15:57:18 +0100 Subject: [PATCH 075/188] Provide better error messages for connection errors --- linkcheck/models.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index 4bedb1f..970e2ce 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -4,7 +4,7 @@ from datetime import timedelta import logging import requests -from requests.exceptions import ReadTimeout +from requests.exceptions import ConnectionError, ReadTimeout from requests.models import REDIRECT_STATI from urllib.parse import unquote @@ -295,6 +295,9 @@ def _check_external(self, tested_url, external_recheck_interval): except ReadTimeout: self.message = 'Other Error: The read operation timed out' self.status = False + except ConnectionError as e: + self.message = format_connection_error(e) + self.status = False except Exception as e: self.message = 'Other Error: %s' % e self.status = False @@ -368,3 +371,31 @@ def link_post_delete(sender, instance, **kwargs): url.delete() except Url.DoesNotExist: pass + + +def format_connection_error(e): + """ + Helper function to provide better readable output of connection errors + """ + # If the exception message is wrapped in an "HTTPSConnectionPool", only give the underlying cause + reason = re.search("\(Caused by ([a-zA-Z]+\(.+\))\)", str(e)) + if not reason: + return f"Connection Error: {e}" + reason = reason[1] + # If the underlying cause is a new connection error, provide additional formatting + if reason.startswith("NewConnectionError"): + return format_new_connection_error(reason) + return f"Connection Error: {reason}" + + +def format_new_connection_error(reason): + """ + Helper function to provide better readable output of new connection errors thrown by urllib3 + """ + connection_reason = re.search( + "NewConnectionError\(': (.+)'\)", + reason, + ) + if connection_reason: + return f"New Connection Error: {connection_reason[1]}" + return reason From 080c5aa8373193eb6023bd418c720aa0169871e6 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Thu, 17 Nov 2022 15:58:43 +0100 Subject: [PATCH 076/188] Verify SSL certificates --- CHANGELOG | 1 + linkcheck/models.py | 19 ++++++++++++++++++- linkcheck/tests/test_linkcheck.py | 7 +++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index c9db5d2..72b986c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ Unreleased +* Verify SSL certificates (Timo Ludwig, #118) * Added support for Python 3.10/3.11 and Django 4.1. * Dropped support for Python 3.6 and Django < 3.2. diff --git a/linkcheck/models.py b/linkcheck/models.py index 970e2ce..9b4f5c3 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -277,7 +277,7 @@ def _check_external(self, tested_url, external_recheck_interval): url = iri_to_uri(url) request_params = { - 'verify': False, 'allow_redirects': True, + 'allow_redirects': True, 'headers': {'User-Agent' : "http://%s Linkchecker" % settings.SITE_DOMAIN}, 'timeout': LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, } @@ -385,6 +385,9 @@ def format_connection_error(e): # If the underlying cause is a new connection error, provide additional formatting if reason.startswith("NewConnectionError"): return format_new_connection_error(reason) + # If the underlying cause is an SSL error, provide additional formatting + if reason.startswith("SSLError"): + return format_ssl_error(reason) return f"Connection Error: {reason}" @@ -399,3 +402,17 @@ def format_new_connection_error(reason): if connection_reason: return f"New Connection Error: {connection_reason[1]}" return reason + + +def format_ssl_error(reason): + """ + Helper function to provide better readable output of SSL errors thrown by urllib3 + """ + ssl_reason = re.search("SSLError\([a-zA-Z]+\((.+)\)\)", reason) + if ssl_reason: + # If the reason lies withing the ssl c library, hide additional debug output + ssl_c_reason = re.search("1, '\[SSL: [A-Z\d_]+\] (.+) \(_ssl\.c:\d+\)'", ssl_reason[1]) + if ssl_c_reason: + return f"SSL Error: {ssl_c_reason[1]}" + return f"SSL Error: {ssl_reason[1]}" + return reason diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index c8051ec..8afaa47 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -173,6 +173,13 @@ def test_external_check_200(self): self.assertEqual(uv.message, '200 OK') self.assertEqual(uv.redirect_to, '') + def test_external_check_200_missing_cert(self): + uv = Url(url="%s/http/200/" % self.live_server_url.replace("http://", "https://"), still_exists=True) + uv.check_url() + self.assertEqual(uv.status, False) + self.assertEqual(uv.message, 'SSL Error: wrong version number') + self.assertEqual(uv.redirect_to, '') + def test_external_check_200_utf8(self): uv = Url(url="%s/http/200/r%%C3%%BCckmeldung/" % self.live_server_url, still_exists=True) uv.check_url() From 9b3c2267e3db1604dbbc33bf6aa4f0f25574aa8d Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 13 Nov 2022 03:14:14 +0100 Subject: [PATCH 077/188] Add test case for URLs exceeding max length This should fail on the current master branch and succeed once #115 is fixed. --- linkcheck/tests/test_linkcheck.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 8afaa47..04d445d 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -19,6 +19,7 @@ enable_listeners, disable_listeners, register_listeners, unregister_listeners, ) from linkcheck.models import Link, Url +from linkcheck.linkcheck_settings import MAX_URL_LENGTH from linkcheck.views import get_jquery_min_js from .sampleapp.models import Author, Book, Journal @@ -295,6 +296,19 @@ def test_found_links(self): transform=lambda obj: obj.url ) + def test_urls_exceeding_max_length(self): + self.assertEqual(Url.objects.all().count(), 0) + with self.assertLogs(logger="linkcheck", level="WARN") as cm: + Book.objects.create(title='My Title', description=f""" + Here's a link: Example, + and here's a url exceeding the max length: logo""") + # We skip urls which are too long because we can't store them in the database + self.assertIn( + f"WARNING:linkcheck.listeners:URL exceeding max length will be skipped: http://www.example.org/{MAX_URL_LENGTH * 'X'}", + cm.output + ) + self.assertEqual(Url.objects.all().count(), 1) + def test_empty_url_field(self): """ Test that URLField empty content is excluded depending on ignore_empty list. From 01a156e474f42953cc87bb23c3152563aa9c7eba Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Wed, 16 Nov 2022 14:26:36 +0100 Subject: [PATCH 078/188] Skip URLs exceeding max length (fixes #115) --- CHANGELOG | 2 ++ linkcheck/listeners.py | 11 +++++++++++ linkcheck/utils.py | 9 +++++++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 72b986c..7c8467d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ Unreleased +* Ignore Urls longer than `MAX_URL_LENGTH` in signal listeners + (Timo Ludwig, #115) * Verify SSL certificates (Timo Ludwig, #118) * Added support for Python 3.10/3.11 and Django 4.1. * Dropped support for Python 3.6 and Django < 3.2. diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index b5084fb..05e238c 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -1,3 +1,4 @@ +import logging import sys import time from contextlib import contextmanager @@ -9,9 +10,13 @@ from . import filebrowser from . import update_lock +from .linkcheck_settings import MAX_URL_LENGTH from linkcheck.models import Url, Link +logger = logging.getLogger(__name__) + + tasks_queue = LifoQueue() worker_running = False tests_running = len(sys.argv) > 1 and sys.argv[1] == 'test' or sys.argv[0].endswith('runtests.py') @@ -77,6 +82,12 @@ def do_check_instance_links(sender, instance, wait=False): if url.startswith('#'): internal_hash = url url = instance.get_absolute_url() + url + + if len(url) > MAX_URL_LENGTH: + # We cannot handle url longer than MAX_URL_LENGTH at the moment + logger.warning('URL exceeding max length will be skipped: %s', url) + continue + u, created = Url.objects.get_or_create(url=url) l, created = Link.objects.get_or_create(url=u, field=link[0], text=link[1], content_type=content_type, object_id=instance.pk) new_links.append(l.id) diff --git a/linkcheck/utils.py b/linkcheck/utils.py index f3b2711..b961c66 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -1,3 +1,4 @@ +import logging from django.apps import apps from django.db import models from django.test.client import ClientHandler @@ -9,6 +10,9 @@ from .linkcheck_settings import MAX_URL_LENGTH, HTML_FIELD_CLASSES, IMAGE_FIELD_CLASSES, URL_FIELD_CLASSES +logger = logging.getLogger(__name__) + + class LinkCheckHandler(ClientHandler): # Customize the ClientHandler to allow us removing some middlewares @@ -17,7 +21,7 @@ def load_middleware(self): self.ignore_keywords = ['reversion.middleware','MaintenanceModeMiddleware'] super().load_middleware() new_request_middleware = [] - + #############################_request_middleware################################# # _request_middleware is removed in newer django. if getattr(self, '_request_middleware', None): @@ -30,7 +34,7 @@ def load_middleware(self): if not ignored: new_request_middleware.append(method) self._request_middleware = new_request_middleware - + #############################_view_middleware################################# new_view_middleware = [] for method in self._view_middleware: @@ -119,6 +123,7 @@ def update_urls(urls, content_type, object_id): if len(url) > MAX_URL_LENGTH: # We cannot handle url longer than MAX_URL_LENGTH at the moment + logger.warning('URL exceeding max length will be skipped: %s', url) continue url, url_created = Url.objects.get_or_create(url=url) From d458385bbead3a70552575407f4bcb60b68c0651 Mon Sep 17 00:00:00 2001 From: Sven Seeberg Date: Sat, 19 Nov 2022 18:27:26 +0100 Subject: [PATCH 079/188] Catch all errors in linkcheck worker, fixes #117 Co-authored-by: Timo Ludwig Co-authored-by: Claude Paroz --- CHANGELOG | 2 ++ linkcheck/listeners.py | 23 ++++++++++++++++++----- linkcheck/tests/test_linkcheck.py | 28 +++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 7c8467d..03a7b87 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ Unreleased +* Avoid crash when unexpected error in signal listener occurs + (Sven Seeberg, #117) * Ignore Urls longer than `MAX_URL_LENGTH` in signal listeners (Timo Ludwig, #115) * Verify SSL certificates (Timo Ludwig, #118) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index 05e238c..34d7390 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -2,7 +2,7 @@ import sys import time from contextlib import contextmanager -from queue import LifoQueue +from queue import Empty, LifoQueue from threading import Thread from django.apps import apps @@ -13,7 +13,6 @@ from .linkcheck_settings import MAX_URL_LENGTH from linkcheck.models import Url, Link - logger = logging.getLogger(__name__) @@ -22,11 +21,25 @@ tests_running = len(sys.argv) > 1 and sys.argv[1] == 'test' or sys.argv[0].endswith('runtests.py') -def linkcheck_worker(): +def linkcheck_worker(block=True): global worker_running while tasks_queue.not_empty: - task = tasks_queue.get() - task['target'](*task['args'], **task['kwargs']) + try: + task = tasks_queue.get(block=block) + except Empty: + break + # An error in any task should not stop the worker from continuing with the queue + try: + task['target'](*task['args'], **task['kwargs']) + except Exception as e: + logger.exception( + "%s while running %s with args=%r and kwargs=%r: %s", + type(e).__name__, + task['target'].__name__, + task['args'], + task['kwargs'], + e + ) tasks_queue.task_done() worker_running = False diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 04d445d..5ffea39 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -16,7 +16,8 @@ from django.urls import reverse from linkcheck.listeners import ( - enable_listeners, disable_listeners, register_listeners, unregister_listeners, + enable_listeners, disable_listeners, linkcheck_worker, register_listeners, + tasks_queue, unregister_listeners, ) from linkcheck.models import Link, Url from linkcheck.linkcheck_settings import MAX_URL_LENGTH @@ -405,6 +406,31 @@ def test_enable_listeners(self): register_listeners() +class QueueTests(TestCase): + def test_queue_handling_continue_on_task_crash(self): + assert tasks_queue.empty() is True + + def raising(): + raise RuntimeError("Failing task") + + def passing(): + pass + + for func in (raising, passing): + tasks_queue.put({ + 'target': func, + 'args': (), + 'kwargs': {}, + }) + with self.assertLogs() as cm: + linkcheck_worker(block=False) + self.assertEqual( + cm.output[0].split('\n')[0], + 'ERROR:linkcheck.listeners:RuntimeError while running raising with ' + 'args=() and kwargs={}: Failing task' + ) + + class ViewTestCase(TestCase): def setUp(self): User.objects.create_superuser('admin', 'admin@example.org', 'password') From ba3a119143eb042cf9abe4ba99f681eb170d6541 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Fri, 18 Nov 2022 09:10:42 +0100 Subject: [PATCH 080/188] Delete outdated objects on findlinks command --- CHANGELOG | 2 + linkcheck/management/commands/findlinks.py | 13 +++-- linkcheck/tests/test_linkcheck.py | 64 ++++++++++++++++------ linkcheck/utils.py | 64 ++++++++++++++++------ 4 files changed, 104 insertions(+), 39 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 03a7b87..7e0fde9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ Unreleased +* Delete outdated `Url` and `Link` objects when + running `findlinks` command (Timo Ludwig, #101) * Avoid crash when unexpected error in signal listener occurs (Sven Seeberg, #117) * Ignore Urls longer than `MAX_URL_LENGTH` in signal listeners diff --git a/linkcheck/management/commands/findlinks.py b/linkcheck/management/commands/findlinks.py index 15dace9..316a608 100644 --- a/linkcheck/management/commands/findlinks.py +++ b/linkcheck/management/commands/findlinks.py @@ -5,10 +5,13 @@ class Command(BaseCommand): - help = "Goes through all models registered with Linkcheck and records any links found" + help = "Goes through all models registered with Linkcheck, records any new links found and removes all outdated links" def handle(self, *args, **options): - self.stdout.write("Finding all new links...") - results = find_all_links() - return ("%(urls_created)s new Url object(s), %(links_created)s new Link object(s), " - "%(urls_deleted)s Url object(s) deleted") % results + self.stdout.write("Updating all links...") + return "\n".join( + [ + f"{model.capitalize()}: {', '.join([f'{count} {label}' for label, count in data.items()])}" + for model, data in find_all_links().items() + ] + ) diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 5ffea39..218f3dd 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -327,18 +327,40 @@ def test_empty_url_field(self): self.assertEqual(Url.objects.all().count(), 2) def test_findlinks_command(self): - all_linklists = apps.get_app_config('linkcheck').all_linklists - all_linklists['Authors'].url_fields = [] - Author.objects.create(name="John Smith", website="http://www.example.org/smith") - all_linklists['Authors'].url_fields = ['website'] - - out = StringIO() - call_command('findlinks', stdout=out) - self.assertEqual( - out.getvalue(), - "Finding all new links...\n" - "1 new Url object(s), 1 new Link object(s), 0 Url object(s) deleted\n" - ) + # Disable listeners to only check the management command + with disable_listeners(): + Author.objects.create(name="John Smith", website="https://www.example.org/smith") + self.assertEqual( + findlinks(), + "Updating all links...\n" + "Urls: 1 created, 0 deleted, 0 unchanged\n" + "Links: 1 created, 0 deleted, 0 unchanged\n" + ) + Author.objects.create(name="John Doe", website="https://www.example.org/doe") + Book.objects.create( + title='My Title', + description="My fav author: John Doe" + ) + self.assertEqual( + findlinks(), + "Updating all links...\n" + "Urls: 1 created, 0 deleted, 1 unchanged\n" + "Links: 2 created, 0 deleted, 1 unchanged\n" + ) + Author.objects.get(name="John Doe").delete() + self.assertEqual( + findlinks(), + "Updating all links...\n" + "Urls: 0 created, 0 deleted, 2 unchanged\n" + "Links: 0 created, 1 deleted, 2 unchanged\n" + ) + Book.objects.first().delete() + self.assertEqual( + findlinks(), + "Updating all links...\n" + "Urls: 0 created, 1 deleted, 1 unchanged\n" + "Links: 0 created, 1 deleted, 1 unchanged\n" + ) class ObjectsUpdateTestCase(TestCase): @@ -487,10 +509,18 @@ def test_filter_callable(self): # assert there are two versions of the same journal self.assertEqual(Journal.objects.count(), 2) # assert command just finds the latest version of same journals - out = StringIO() - call_command('findlinks', stdout=out) self.assertEqual( - out.getvalue(), - "Finding all new links...\n" - "1 new Url object(s), 1 new Link object(s), 0 Url object(s) deleted\n" + findlinks(), + "Updating all links...\n" + "Urls: 1 created, 0 deleted, 0 unchanged\n" + "Links: 1 created, 0 deleted, 0 unchanged\n" ) + + +def findlinks(): + """ + Helper function for running the findlinks command and checking its output + """ + out = StringIO() + call_command('findlinks', stdout=out) + return out.getvalue() diff --git a/linkcheck/utils.py b/linkcheck/utils.py index b961c66..2d3160a 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -113,7 +113,9 @@ def update_urls(urls, content_type, object_id): # Structure of urls param is [(field, link text, url), ... ] - new_urls = new_links = 0 + urls_created = links_created = 0 + new_url_ids = set() + new_link_ids = set() for field, link_text, url in urls: @@ -136,12 +138,24 @@ def update_urls(urls, content_type, object_id): object_id=object_id, ) - url.still_exists = True - url.save() - new_urls += url_created - new_links += link_created + # Keep track of how many objects were created + urls_created += url_created + links_created += link_created + + # Keep track of object ids (no matter if created or existing) + new_url_ids.add(url.id) + new_link_ids.add(link.id) - return new_urls, new_links + return { + "urls": { + "created": urls_created, + "ids": new_url_ids, + }, + "links": { + "created": links_created, + "ids": new_link_ids, + }, + } def find_all_links(linklists=None): @@ -149,10 +163,12 @@ def find_all_links(linklists=None): if linklists is None: linklists = apps.get_app_config('linkcheck').all_linklists - all_links_dict = {} urls_created = links_created = 0 + new_url_ids = set() + new_link_ids = set() - Url.objects.all().update(still_exists=False) + urls_before = Url.objects.count() + links_before = Link.objects.count() for linklist_name, linklist_cls in linklists.items(): @@ -163,19 +179,33 @@ def find_all_links(linklists=None): object_id = linklist['object'].id urls = linklist['urls'] + linklist['images'] if urls: - new_urls, new_links = update_urls(urls, content_type, object_id) - urls_created += new_urls - links_created += new_links - all_links_dict[linklist_name] = linklists + new = update_urls(urls, content_type, object_id) + + urls_created += new["urls"]["created"] + links_created += new["links"]["created"] + + new_url_ids.update(new["urls"]["ids"]) + new_link_ids.update(new["links"]["ids"]) - deleted = Url.objects.filter(still_exists=False).count() + # Delete all urls and links which are no longer part of the link lists + Url.objects.all().exclude(id__in=new_url_ids).delete() + Link.objects.all().exclude(id__in=new_link_ids).delete() - Url.objects.filter(still_exists=False).delete() + # Calculate diff + urls_after = Url.objects.count() + links_after = Link.objects.count() return { - 'urls_deleted': deleted, - 'urls_created': urls_created, - 'links_created': links_created, + "urls": { + "created": urls_created, + "deleted": urls_before + urls_created - urls_after, + "unchanged": urls_after - urls_created, + }, + "links": { + "created": links_created, + "deleted": links_before + links_created - links_after, + "unchanged": links_after - links_created, + }, } From 93e5521d210123efc40b7172cca9f47944ba14b8 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Fri, 18 Nov 2022 09:16:58 +0100 Subject: [PATCH 081/188] Remove unused field `still_exists` from `Url` model --- CHANGELOG | 1 + linkcheck/listeners.py | 1 - .../0004_remove_url_still_exists.py | 16 ++++++ linkcheck/models.py | 1 - linkcheck/tests/test_linkcheck.py | 50 +++++++++---------- linkcheck/utils.py | 2 +- 6 files changed, 43 insertions(+), 28 deletions(-) create mode 100644 linkcheck/migrations/0004_remove_url_still_exists.py diff --git a/CHANGELOG b/CHANGELOG index 7e0fde9..28b154c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ Unreleased +* Remove unused field `still_exists` from `Url` model * Delete outdated `Url` and `Link` objects when running `findlinks` command (Timo Ludwig, #101) * Avoid crash when unexpected error in signal listener occurs diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index 34d7390..1a84e9e 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -104,7 +104,6 @@ def do_check_instance_links(sender, instance, wait=False): u, created = Url.objects.get_or_create(url=url) l, created = Link.objects.get_or_create(url=u, field=link[0], text=link[1], content_type=content_type, object_id=instance.pk) new_links.append(l.id) - u.still_exists = True if internal_hash: setattr(u, '_internal_hash', internal_hash) setattr(u, '_instance', instance) diff --git a/linkcheck/migrations/0004_remove_url_still_exists.py b/linkcheck/migrations/0004_remove_url_still_exists.py new file mode 100644 index 0000000..3fe459c --- /dev/null +++ b/linkcheck/migrations/0004_remove_url_still_exists.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('linkcheck', '0003_redirect_to_as_textfield'), + ] + + operations = [ + migrations.RemoveField( + model_name='url', + name='still_exists', + ), + ] diff --git a/linkcheck/models.py b/linkcheck/models.py index 9b4f5c3..d997d40 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -64,7 +64,6 @@ class Url(models.Model): last_checked = models.DateTimeField(blank=True, null=True) status = models.BooleanField(null=True) message = models.CharField(max_length=1024, blank=True, null=True) - still_exists = models.BooleanField(default=False) redirect_to = models.TextField(blank=True) @property diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 218f3dd..c80578b 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -73,49 +73,49 @@ def setUp(self): request.urlopen = mock_urlopen def test_internal_check_mailto(self): - uv = Url(url="mailto:nobody", still_exists=True) + uv = Url(url="mailto:nobody") uv.check_url() self.assertEqual(uv.status, None) self.assertEqual(uv.message, 'Email link (not automatically checked)') def test_internal_check_blank(self): - uv = Url(url="", still_exists=True) + uv = Url(url="") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'Empty link') def test_internal_check_anchor(self): - uv = Url(url="#some_anchor", still_exists=True) + uv = Url(url="#some_anchor") uv.check_url() self.assertEqual(uv.status, None) self.assertEqual(uv.message, 'Link to within the same page (not automatically checked)') def test_internal_check_view_redirect(self): - uv = Url(url="/admin/linkcheck", still_exists=True) + uv = Url(url="/admin/linkcheck") uv.check_url() self.assertEqual(uv.status, True) self.assertIn(uv.message, ['This link redirects: code %s (Working redirect)' % status for status in [301, 302]] ) - uv = Url(url="/http/brokenredirect/", still_exists=True) + uv = Url(url="/http/brokenredirect/") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'This link redirects: code 302 (Broken redirect)') def test_internal_check_found(self): - uv = Url(url="/public/", still_exists=True) + uv = Url(url="/public/") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, 'Working internal link') def test_internal_check_broken_internal_link(self): - uv = Url(url="/broken/internal/link", still_exists=True) + uv = Url(url="/broken/internal/link") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'Broken internal link') def test_internal_check_invalid_url(http://github.com/self): - uv = Url(url="invalid/url", still_exists=True) + uv = Url(url="invalid/url") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'Invalid URL') @@ -123,7 +123,7 @@ def test_internal_check_invalid_url(http://github.com/self): def test_same_page_anchor(self): # TODO Make this test pass - #uv = Url(url="#anchor", still_exists=True) + #uv = Url(url="#anchor") #uv.check_url() #self.assertEqual(uv.status, None) #self.assertEqual(uv.message, "") @@ -138,13 +138,13 @@ def tearDown(self): settings.MEDIA_ROOT = self.old_media_root def test_internal_check_media_missing(self): - uv = Url(url="/media/not_found", still_exists=True) + uv = Url(url="/media/not_found") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'Missing Document') def test_internal_check_media_found(self): - uv = Url(url="/media/found", still_exists=True) + uv = Url(url="/media/found") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, 'Working file link') @@ -153,12 +153,12 @@ def test_internal_check_media_utf8(self): media_file = os.path.join(os.path.dirname(__file__), 'media', 'rückmeldung') open(media_file, 'a').close() self.addCleanup(os.remove, media_file) - uv = Url(url="/media/r%C3%BCckmeldung", still_exists=True) + uv = Url(url="/media/r%C3%BCckmeldung") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, 'Working file link') # Also when the url is not encoded - uv = Url(url="/media/rückmeldung", still_exists=True) + uv = Url(url="/media/rückmeldung") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, 'Working file link') @@ -169,77 +169,77 @@ def test_internal_check_media_utf8(self): @override_settings(SITE_DOMAIN='example.com') class ExternalCheckTestCase(LiveServerTestCase): def test_external_check_200(self): - uv = Url(url="%s/http/200/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/http/200/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') self.assertEqual(uv.redirect_to, '') def test_external_check_200_missing_cert(self): - uv = Url(url="%s/http/200/" % self.live_server_url.replace("http://", "https://"), still_exists=True) + uv = Url(url="%s/http/200/" % self.live_server_url.replace("http://", "https://")) uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'SSL Error: wrong version number') self.assertEqual(uv.redirect_to, '') def test_external_check_200_utf8(self): - uv = Url(url="%s/http/200/r%%C3%%BCckmeldung/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/http/200/r%%C3%%BCckmeldung/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') # Also when the url is not encoded - uv = Url(url="%s/http/200/rückmeldung/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/http/200/rückmeldung/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') def test_external_check_301(self): - uv = Url(url="%s/http/301/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/http/301/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message.lower(), '301 moved permanently') self.assertEqual(uv.redirect_to, '') def test_external_check_301_followed(self): - uv = Url(url="%s/http/redirect/301/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/http/redirect/301/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '301 Moved Permanently') self.assertEqual(uv.redirect_to, '%s/http/200/' % self.live_server_url) def test_external_check_302_followed(self): - uv = Url(url="%s/http/redirect/302/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/http/redirect/302/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '302 Found') self.assertEqual(uv.redirect_to, '%s/http/200/' % self.live_server_url) def test_external_check_404(self): - uv = Url(url="%s/whatever/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/whatever/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message.lower(), '404 not found') def test_external_check_redirect_final_404(self): - uv = Url(url="%s/http/redirect_to_404/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/http/redirect_to_404/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message.lower(), '404 not found') def test_external_check_get_only(self): # An URL that allows GET but not HEAD, linkcheck should fallback on GET. - uv = Url(url="%s/http/getonly/405/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/http/getonly/405/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') # Same test with other 40x error - uv = Url(url="%s/http/getonly/400/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/http/getonly/400/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') def test_external_check_timedout(self): - uv = Url(url="%s/timeout/" % self.live_server_url, still_exists=True) + uv = Url(url="%s/timeout/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'Other Error: The read operation timed out') diff --git a/linkcheck/utils.py b/linkcheck/utils.py index 2d3160a..26ea687 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -92,7 +92,7 @@ def check_links(external_recheck_interval=10080, limit=-1, check_internal=True, Return the number of links effectively checked. """ - urls = Url.objects.filter(still_exists=True) + urls = Url.objects.all() # An optimization for when check_internal is False if not check_internal: From d0e2d20d2475644ba0de8a7891f7d414ccf15805 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sat, 19 Nov 2022 19:34:32 +0100 Subject: [PATCH 082/188] Improve representation of models --- linkcheck/models.py | 9 +++++++++ linkcheck/tests/test_linkcheck.py | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/linkcheck/models.py b/linkcheck/models.py index d997d40..a9cf8d9 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -102,6 +102,9 @@ def colour(self): def __str__(self): return self.url + def __repr__(self): + return f"" + @property def external(self): return EXTERNAL_REGEX.match(self.url) @@ -359,6 +362,12 @@ def display_url(http://github.com/self): return '#' + anchor_part return self.url.url + def __str__(self): + return f"{self.url.url} ({self.content_object})" + + def __repr__(self): + return f"" + def link_post_delete(sender, instance, **kwargs): try: diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index c80578b..83cf15c 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -245,6 +245,31 @@ def test_external_check_timedout(self): self.assertEqual(uv.message, 'Other Error: The read operation timed out') +class ModelTestCase(TestCase): + + def test_str(self): + Author.objects.create(name="John Smith", website="http://www.example.org/smith") + self.assertEqual( + str(Url.objects.first()), + "http://www.example.org/smith", + ) + self.assertEqual( + str(Link.objects.first()), + "http://www.example.org/smith%20(Author%20object%20(1))", + ) + + def test_repr(self): + Author.objects.create(name="John Smith", website="http://www.example.org/smith") + self.assertEqual( + repr(Url.objects.first()), + "", + ) + self.assertEqual( + repr(Link.objects.first()), + ", source: )>", + ) + + class ChecklinksTestCase(TestCase): def setUp(self): request.urlopen = mock_urlopen From 82bd62bffede83164831135afec7f72feb4dbabd Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sat, 19 Nov 2022 23:03:42 +0100 Subject: [PATCH 083/188] Remove legacy code --- linkcheck/__init__.py | 3 -- linkcheck/tests/test_linkcheck.py | 50 ------------------------------- 2 files changed, 53 deletions(-) diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index d85f4f9..b912a91 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -6,9 +6,6 @@ # A global lock, showing whether linkcheck is busy update_lock = threading.Lock() -if django.VERSION <= (3, 2): - default_app_config = 'linkcheck.apps.LinkcheckConfig' - class Lister(HTMLParser): diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 83cf15c..a3ea5e0 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -1,8 +1,5 @@ from datetime import datetime, timedelta from io import StringIO -from unittest import skipIf -from urllib import request -from urllib.error import HTTPError import os import re @@ -26,52 +23,9 @@ from .sampleapp.models import Author, Book, Journal -#MOCK addinfurl -class addinfourl(): - """class to add info() and geturl(http://github.com/url=) methods to an open file.""" - - def __init__(self, url, code, msg): - self.headers = None - self.url = url - self.code = code - self.msg = msg - - def info(self): - return self.headers - - def getcode(self): - return self.code - - def geturl(http://github.com/self): - return self.url - -# -# Mock Method so test can run independently -# - -def mock_urlopen(url, data=None, **kwargs): - msg_dict = {'301': "Moved Permanently", '404': 'Not Found', '200': 'OK'} - - code = '404' - msg = msg_dict.get(code) - - m = re.search("([0-9]*)$", url) - if m: - code = m.group(0) - msg = msg_dict.get(code, 'Something Happened') - if code == "200": - return addinfourl(http://github.com/url,%20code,%20msg) - - raise HTTPError(url, code, msg, None, None) - - @override_settings(ROOT_URLCONF='linkcheck.tests.urls') class InternalCheckTestCase(TestCase): - def setUp(self): - #replace urllib2.urlopen with mock method - request.urlopen = mock_urlopen - def test_internal_check_mailto(self): uv = Url(url="mailto:nobody") uv.check_url() @@ -164,8 +118,6 @@ def test_internal_check_media_utf8(self): self.assertEqual(uv.message, 'Working file link') -# See https://code.djangoproject.com/ticket/29849 (fixed in Django 2.1+) -@skipIf(django.VERSION[:2]==(2, 0), 'LiveServerTestCase is broken on Django 2.0.x') @override_settings(SITE_DOMAIN='example.com') class ExternalCheckTestCase(LiveServerTestCase): def test_external_check_200(self): @@ -271,8 +223,6 @@ def test_repr(self): class ChecklinksTestCase(TestCase): - def setUp(self): - request.urlopen = mock_urlopen def test_checklinks_command(self): Book.objects.create(title='My Title', description=""" From 8783a31b084d8c01100048aa5dee7e4717845884 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sat, 19 Nov 2022 23:35:30 +0100 Subject: [PATCH 084/188] Add Django framework classifiers --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index ad3bb89..02c80d2 100644 --- a/setup.py +++ b/setup.py @@ -45,5 +45,8 @@ def read(fname): 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Framework :: Django', + "Framework :: Django :: 3.2", + "Framework :: Django :: 4.0", + "Framework :: Django :: 4.1", ], ) From 918d2adfb0db1570040854336fff02e16cac59cc Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 00:15:48 +0100 Subject: [PATCH 085/188] Use setuptools instead of distutils --- setup.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/setup.py b/setup.py index 02c80d2..db8aadf 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import os -from distutils.core import setup + +from setuptools import find_packages, setup def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() @@ -14,21 +15,7 @@ def read(fname): author_email='andy@andybak.net', license='BSD', url='https://github.com/DjangoAdminHackers/django-linkcheck', - packages=[ - 'linkcheck', - 'linkcheck.management', - 'linkcheck.management.commands', - 'linkcheck.migrations', - 'linkcheck.tests', - 'linkcheck.tests.sampleapp', - ], - package_data={ - 'linkcheck': [ - 'templates/linkcheck/*.html', - 'templates/linkcheck/*.xhtml', - 'tests/media/*', - ] - }, + include_package_data=True, install_requires=['requests'], classifiers=[ 'Development Status :: 5 - Production/Stable', From 8494dd30b574c8b92c9a35e87bd27234ea9adfb8 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 12:21:17 +0100 Subject: [PATCH 086/188] Add tests for hash anchors --- linkcheck/tests/sampleapp/views.py | 4 ++++ linkcheck/tests/test_linkcheck.py | 34 ++++++++++++++++++++++-------- linkcheck/tests/urls.py | 1 + 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/linkcheck/tests/sampleapp/views.py b/linkcheck/tests/sampleapp/views.py index 60821a9..7376e34 100644 --- a/linkcheck/tests/sampleapp/views.py +++ b/linkcheck/tests/sampleapp/views.py @@ -22,3 +22,7 @@ def http_redirect_to_404(request): def timeout(request): time.sleep(2) return HttpResponse("") + + +def http_response_with_anchor(request): + return HttpResponse("

Anchor

") diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index a3ea5e0..16624be 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -38,12 +38,24 @@ def test_internal_check_blank(self): self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'Empty link') - def test_internal_check_anchor(self): + def test_same_page_anchor(self): uv = Url(url="#some_anchor") uv.check_url() self.assertEqual(uv.status, None) self.assertEqual(uv.message, 'Link to within the same page (not automatically checked)') + def test_working_internal_anchor(self): + uv = Url(url="/http/anchor/#anchor") + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, "Working internal hash anchor") + + def test_broken_internal_anchor(self): + uv = Url(url="/http/anchor/#broken-anchor") + uv.check_url() + self.assertEqual(uv.status, False) + self.assertEqual(uv.message, "Broken internal hash anchor") + def test_internal_check_view_redirect(self): uv = Url(url="/admin/linkcheck") uv.check_url() @@ -74,14 +86,6 @@ def test_internal_check_invalid_url(http://github.com/self): self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'Invalid URL') - def test_same_page_anchor(self): - # TODO Make this test - pass - #uv = Url(url="#anchor") - #uv.check_url() - #self.assertEqual(uv.status, None) - #self.assertEqual(uv.message, "") - class InternalMediaCheckTestCase(TestCase): def setUp(self): @@ -196,6 +200,18 @@ def test_external_check_timedout(self): self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'Other Error: The read operation timed out') + def test_working_external_anchor(self): + uv = Url(url=f"{self.live_server_url}/http/anchor/#anchor") + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, "Working external hash anchor") + + def test_broken_external_anchor(self): + uv = Url(url=f"{self.live_server_url}/http/anchor/#broken-anchor") + uv.check_url() + self.assertEqual(uv.status, False) + self.assertEqual(uv.message, "Broken external hash anchor") + class ModelTestCase(TestCase): diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py index ab4b2e9..4244db7 100644 --- a/linkcheck/tests/urls.py +++ b/linkcheck/tests/urls.py @@ -17,5 +17,6 @@ path('http/redirect//', views.http_redirect), path('http/redirect_to_404/', views.http_redirect_to_404), path('http/brokenredirect/', RedirectView.as_view(url='/non-existent/')), + path('http/anchor/', views.http_response_with_anchor), path('timeout/', views.timeout), ] From 79762ea024111ea4bf68f467360ff5f2658ec367 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sat, 19 Nov 2022 21:27:14 +0100 Subject: [PATCH 087/188] Add setting to ignore broken hash anchors --- CHANGELOG | 3 +++ README.rst | 11 ++++++++++- linkcheck/linkcheck_settings.py | 1 + linkcheck/models.py | 22 +++++++++++++++++++--- linkcheck/tests/test_linkcheck.py | 15 +++++++++++++++ 5 files changed, 48 insertions(+), 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 28b154c..b2e181b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ Unreleased +* Breaking change: Treat broken hash anchors as valid + unless `LINKCHECK_TOLERATE_BROKEN_ANCHOR` is manually + set to `False` (Timo Ludwig, #98) * Remove unused field `still_exists` from `Url` model * Delete outdated `Url` and `Link` objects when running `findlinks` command (Timo Ludwig, #101) diff --git a/README.rst b/README.rst index 2028152..1114d86 100644 --- a/README.rst +++ b/README.rst @@ -34,7 +34,7 @@ Basic usage #. Install app to somewhere on your Python path (e.g. ``pip install django-linkcheck``). - + #. Add ``'linkcheck'`` to your ``settings.INSTALLED_APPS``. #. Add a file named ``linklists.py`` to every app (see an example in ``examples/linklists.py``) that either: @@ -195,6 +195,15 @@ test.mysite.com If you instead set LINKCHECK_SITE_DOMAINS to be a list or tuple then you can explicitly list the domains that should be treated as internal. +LINKCHECK_TOLERATE_BROKEN_ANCHOR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Default: ``True`` + +Whether links with broken hash anchors should be marked as valid. +Disable this if you want that links to anchors which are not contained in the link target's HTML source are marked as invalid. + + django-filebrowser integration ------------------------------ diff --git a/linkcheck/linkcheck_settings.py b/linkcheck/linkcheck_settings.py index 667590c..566be34 100644 --- a/linkcheck/linkcheck_settings.py +++ b/linkcheck/linkcheck_settings.py @@ -58,3 +58,4 @@ RESULTS_PER_PAGE = getattr(settings, 'LINKCHECK_RESULTS_PER_PAGE', 500) SITE_DOMAINS = getattr(settings, 'LINKCHECK_SITE_DOMAINS', []) DISABLE_LISTENERS = getattr(settings, 'LINKCHECK_DISABLE_LISTENERS', False) +TOLERATE_BROKEN_ANCHOR = getattr(settings, 'LINKCHECK_TOLERATE_BROKEN_ANCHOR', True) diff --git a/linkcheck/models.py b/linkcheck/models.py index a9cf8d9..efeed39 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -30,6 +30,7 @@ EXTERNAL_REGEX_STRING, EXTERNAL_RECHECK_INTERVAL, LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, + TOLERATE_BROKEN_ANCHOR, ) logger = logging.getLogger('linkcheck') @@ -206,10 +207,17 @@ def _check_internal(self, tested_url): if hash in names: self.message = 'Working internal hash anchor' self.status = True + elif TOLERATE_BROKEN_ANCHOR: + self.message = 'Page OK, but broken internal hash anchor' + self.status = True else: self.message = 'Broken internal hash anchor' except UnicodeDecodeError: - self.message = 'Failed to parse HTML for anchor' + if TOLERATE_BROKEN_ANCHOR: + self.message = 'Page OK, but failed to parse HTML for anchor' + self.status = True + else: + self.message = 'Failed to parse HTML for anchor' elif tested_url.startswith('/'): @@ -232,12 +240,17 @@ def _check_internal(self, tested_url): names = parse_anchors(response.content) if anchor in names: self.message = 'Working internal hash anchor' - self.status = True + elif TOLERATE_BROKEN_ANCHOR: + self.message = 'Page OK, but broken internal hash anchor' else: self.message = 'Broken internal hash anchor' self.status = False except UnicodeDecodeError: - self.message = 'Failed to parse HTML for anchor' + if TOLERATE_BROKEN_ANCHOR: + self.message = 'Page OK, but failed to parse HTML for anchor' + else: + self.message = 'Failed to parse HTML for anchor' + self.status = False elif response.status_code == 302 or response.status_code == 301: with modify_settings(ALLOWED_HOSTS={'append': 'testserver'}): @@ -315,6 +328,9 @@ def _check_external(self, tested_url, external_recheck_interval): if anchor in names: self.message = 'Working external hash anchor' self.status = True + elif TOLERATE_BROKEN_ANCHOR: + self.message = 'Page OK, but broken external hash anchor' + self.status = True else: self.message = 'Broken external hash anchor' self.status = False diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 16624be..b2380a2 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -1,5 +1,6 @@ from datetime import datetime, timedelta from io import StringIO +from unittest.mock import patch import os import re @@ -50,12 +51,19 @@ def test_working_internal_anchor(self): self.assertEqual(uv.status, True) self.assertEqual(uv.message, "Working internal hash anchor") + @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) def test_broken_internal_anchor(self): uv = Url(url="/http/anchor/#broken-anchor") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, "Broken internal hash anchor") + def test_broken_internal_anchor_tolerated(self): + uv = Url(url="/http/anchor/#broken-anchor") + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, "Page OK, but broken internal hash anchor") + def test_internal_check_view_redirect(self): uv = Url(url="/admin/linkcheck") uv.check_url() @@ -206,12 +214,19 @@ def test_working_external_anchor(self): self.assertEqual(uv.status, True) self.assertEqual(uv.message, "Working external hash anchor") + @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) def test_broken_external_anchor(self): uv = Url(url=f"{self.live_server_url}/http/anchor/#broken-anchor") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, "Broken external hash anchor") + def test_broken_external_anchor_tolerated(self): + uv = Url(url=f"{self.live_server_url}/http/anchor/#broken-anchor") + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, "Page OK, but broken external hash anchor") + class ModelTestCase(TestCase): From 402e9c8f5d25ac4f9011176d3714193e6fe92c77 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 21:35:55 +0100 Subject: [PATCH 088/188] Fix help text of unignore command --- linkcheck/management/commands/unignore_links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/management/commands/unignore_links.py b/linkcheck/management/commands/unignore_links.py index 22b4193..f2c29ed 100644 --- a/linkcheck/management/commands/unignore_links.py +++ b/linkcheck/management/commands/unignore_links.py @@ -5,7 +5,7 @@ class Command(BaseCommand): - help = "Goes through all models registered with Linkcheck and records any links found" + help = "Updates the `ignore` status of all links to `False`" def execute(self, *args, **options): print("Unignoring all links") From d45fb6212c213db5877bd34fcda52f24cf600284 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 12:13:02 +0100 Subject: [PATCH 089/188] Fix incorrect message when redirect has broken anchor --- CHANGELOG | 2 + linkcheck/models.py | 113 ++++++++++------------------- linkcheck/tests/sampleapp/views.py | 4 + linkcheck/tests/test_linkcheck.py | 58 ++++++++++++--- linkcheck/tests/urls.py | 1 + 5 files changed, 91 insertions(+), 87 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index b2e181b..b392dbe 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ Unreleased +* Fix incorrect message when redirect has broken anchor + (Timo Ludwig, #128) * Breaking change: Treat broken hash anchors as valid unless `LINKCHECK_TOLERATE_BROKEN_ANCHOR` is manually set to `False` (Timo Ludwig, #98) diff --git a/linkcheck/models.py b/linkcheck/models.py index efeed39..a4bf7ca 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -202,23 +202,7 @@ def _check_internal(self, tested_url): html_content = '' for field in instance._linklist.html_fields: html_content += getattr(instance, field, '') - try: - names = parse_anchors(html_content) - if hash in names: - self.message = 'Working internal hash anchor' - self.status = True - elif TOLERATE_BROKEN_ANCHOR: - self.message = 'Page OK, but broken internal hash anchor' - self.status = True - else: - self.message = 'Broken internal hash anchor' - except UnicodeDecodeError: - if TOLERATE_BROKEN_ANCHOR: - self.message = 'Page OK, but failed to parse HTML for anchor' - self.status = True - else: - self.message = 'Failed to parse HTML for anchor' - + self._check_anchor(hash, html_content) elif tested_url.startswith('/'): old_prepend_setting = settings.PREPEND_WWW @@ -230,41 +214,24 @@ def _check_internal(self, tested_url): if response.status_code == 200: self.message = 'Working internal link' self.status = True - # see if the internal link points an anchor - if tested_url[-1] == '#': # special case, point to # - self.message = 'Working internal hash anchor' - elif tested_url.count('#'): - anchor = tested_url.split('#')[1] - from linkcheck import parse_anchors - try: - names = parse_anchors(response.content) - if anchor in names: - self.message = 'Working internal hash anchor' - elif TOLERATE_BROKEN_ANCHOR: - self.message = 'Page OK, but broken internal hash anchor' - else: - self.message = 'Broken internal hash anchor' - self.status = False - except UnicodeDecodeError: - if TOLERATE_BROKEN_ANCHOR: - self.message = 'Page OK, but failed to parse HTML for anchor' - else: - self.message = 'Failed to parse HTML for anchor' - self.status = False - elif response.status_code == 302 or response.status_code == 301: + redirect_type = "permanent" if response.status_code == 301 else "temporary" with modify_settings(ALLOWED_HOSTS={'append': 'testserver'}): - redir_response = c.get(tested_url, follow=True) - if redir_response.status_code == 200: - redir_state = 'Working redirect' + response = c.get(tested_url, follow=True) + if response.status_code == 200: + self.message = f'Working {redirect_type} redirect' self.status = True else: - redir_state = 'Broken redirect' - self.status = False - self.message = 'This link redirects: code %d (%s)' % ( - response.status_code, redir_state) + self.message = f'Broken {redirect_type} redirect' else: self.message = 'Broken internal link' + # see if the internal link points an anchor + if tested_url[-1] == '#': + # special case, point to # + self.message += ', working internal hash anchor' + elif tested_url.count('#'): + anchor = tested_url.split('#')[1] + self._check_anchor(anchor, response.content) settings.PREPEND_WWW = old_prepend_setting else: self.message = 'Invalid URL' @@ -309,48 +276,42 @@ def _check_external(self, tested_url, external_recheck_interval): response = requests.get(url, **request_params) except ReadTimeout: self.message = 'Other Error: The read operation timed out' - self.status = False except ConnectionError as e: self.message = format_connection_error(e) - self.status = False except Exception as e: self.message = 'Other Error: %s' % e - self.status = False else: - self.message = ' '.join([str(response.status_code), response.reason]) - self.status = 200 <= response.status_code < 400 + self.message = f"{response.status_code} {response.reason}" + + if response.ok and response.status_code not in REDIRECT_STATI: + self.status = True + # If initial response was a redirect, return the initial return code + if response.history: + self.message = f"{response.history[0].status_code} {response.history[0].reason}" + self.redirect_to = response.url if tested_url.count('#'): anchor = tested_url.split('#')[1] - from linkcheck import parse_anchors - try: - names = parse_anchors(response.text) - if anchor in names: - self.message = 'Working external hash anchor' - self.status = True - elif TOLERATE_BROKEN_ANCHOR: - self.message = 'Page OK, but broken external hash anchor' - self.status = True - else: - self.message = 'Broken external hash anchor' - self.status = False - - except: - # The external web page is mal-formatted #or maybe other parse errors like encoding - # I reckon a broken anchor on an otherwise good URL should count as a pass - self.message = "Page OK but anchor can't be checked" - self.status = True - - if response.status_code in REDIRECT_STATI: - # This means it could not follow the redirection - self.status = False - elif response.status_code < 300 and response.history: - self.message = ' '.join([str(response.history[0].status_code), response.history[0].reason]) - self.redirect_to = response.url + self._check_anchor(anchor, response.text, internal=False) self.last_checked = now() self.save() + def _check_anchor(self, anchor, html, internal=True): + from linkcheck import parse_anchors + scope = "internal" if internal else "external" + try: + names = parse_anchors(html) + if anchor in names: + self.message += f', working {scope} hash anchor' + else: + self.message += f', broken {scope} hash anchor' + if not TOLERATE_BROKEN_ANCHOR: + self.status = False + except UnicodeDecodeError: + self.message += ', failed to parse HTML for anchor' + if not TOLERATE_BROKEN_ANCHOR: + self.status = False class Link(models.Model): """ diff --git a/linkcheck/tests/sampleapp/views.py b/linkcheck/tests/sampleapp/views.py index 7376e34..3eba199 100644 --- a/linkcheck/tests/sampleapp/views.py +++ b/linkcheck/tests/sampleapp/views.py @@ -26,3 +26,7 @@ def timeout(request): def http_response_with_anchor(request): return HttpResponse("

Anchor

") + + +def http_redirect_to_anchor(request): + return HttpResponseRedirect("/http/anchor/") diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index b2380a2..74b0968 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -49,32 +49,49 @@ def test_working_internal_anchor(self): uv = Url(url="/http/anchor/#anchor") uv.check_url() self.assertEqual(uv.status, True) - self.assertEqual(uv.message, "Working internal hash anchor") + self.assertEqual(uv.message, "Working internal link, working internal hash anchor") @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) def test_broken_internal_anchor(self): uv = Url(url="/http/anchor/#broken-anchor") uv.check_url() self.assertEqual(uv.status, False) - self.assertEqual(uv.message, "Broken internal hash anchor") + self.assertEqual(uv.message, "Working internal link, broken internal hash anchor") def test_broken_internal_anchor_tolerated(self): uv = Url(url="/http/anchor/#broken-anchor") uv.check_url() self.assertEqual(uv.status, True) - self.assertEqual(uv.message, "Page OK, but broken internal hash anchor") + self.assertEqual(uv.message, "Working internal link, broken internal hash anchor") + + def test_redirect_working_internal_anchor(self): + uv = Url(url="/http/redirect_to_anchor/#anchor") + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, "Working temporary redirect, working internal hash anchor") + + @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) + def test_redirect_broken_internal_anchor(self): + uv = Url(url="/http/redirect_to_anchor/#broken-anchor") + uv.check_url() + self.assertEqual(uv.status, False) + self.assertEqual(uv.message, "Working temporary redirect, broken internal hash anchor") + + def test_redirect_broken_internal_anchor_tolerated(self): + uv = Url(url="/http/redirect_to_anchor/#broken-anchor") + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, "Working temporary redirect, broken internal hash anchor") def test_internal_check_view_redirect(self): uv = Url(url="/admin/linkcheck") uv.check_url() self.assertEqual(uv.status, True) - self.assertIn(uv.message, - ['This link redirects: code %s (Working redirect)' % status for status in [301, 302]] - ) + self.assertEqual(uv.message, "Working temporary redirect") uv = Url(url="/http/brokenredirect/") uv.check_url() self.assertEqual(uv.status, False) - self.assertEqual(uv.message, 'This link redirects: code 302 (Broken redirect)') + self.assertEqual(uv.message, 'Broken temporary redirect') def test_internal_check_found(self): uv = Url(url="/public/") @@ -161,7 +178,7 @@ def test_external_check_301(self): uv = Url(url="%s/http/301/" % self.live_server_url) uv.check_url() self.assertEqual(uv.status, False) - self.assertEqual(uv.message.lower(), '301 moved permanently') + self.assertEqual(uv.message, '301 Moved Permanently') self.assertEqual(uv.redirect_to, '') def test_external_check_301_followed(self): @@ -212,20 +229,39 @@ def test_working_external_anchor(self): uv = Url(url=f"{self.live_server_url}/http/anchor/#anchor") uv.check_url() self.assertEqual(uv.status, True) - self.assertEqual(uv.message, "Working external hash anchor") + self.assertEqual(uv.message, "200 OK, working external hash anchor") @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) def test_broken_external_anchor(self): uv = Url(url=f"{self.live_server_url}/http/anchor/#broken-anchor") uv.check_url() self.assertEqual(uv.status, False) - self.assertEqual(uv.message, "Broken external hash anchor") + self.assertEqual(uv.message, "200 OK, broken external hash anchor") def test_broken_external_anchor_tolerated(self): uv = Url(url=f"{self.live_server_url}/http/anchor/#broken-anchor") uv.check_url() self.assertEqual(uv.status, True) - self.assertEqual(uv.message, "Page OK, but broken external hash anchor") + self.assertEqual(uv.message, "200 OK, broken external hash anchor") + + def test_redirect_working_external_anchor(self): + uv = Url(url=f"{self.live_server_url}/http/redirect_to_anchor/#anchor") + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, "302 Found, working external hash anchor") + + @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) + def test_redirect_broken_external_anchor(self): + uv = Url(url=f"{self.live_server_url}/http/redirect_to_anchor/#broken-anchor") + uv.check_url() + self.assertEqual(uv.status, False) + self.assertEqual(uv.message, "302 Found, broken external hash anchor") + + def test_redirect_broken_external_anchor_tolerated(self): + uv = Url(url=f"{self.live_server_url}/http/redirect_to_anchor/#broken-anchor") + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, "302 Found, broken external hash anchor") class ModelTestCase(TestCase): diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py index 4244db7..bb0eeb1 100644 --- a/linkcheck/tests/urls.py +++ b/linkcheck/tests/urls.py @@ -16,6 +16,7 @@ path('http/getonly//', views.http_response_get_only), path('http/redirect//', views.http_redirect), path('http/redirect_to_404/', views.http_redirect_to_404), + path('http/redirect_to_anchor/', views.http_redirect_to_anchor), path('http/brokenredirect/', RedirectView.as_view(url='/non-existent/')), path('http/anchor/', views.http_response_with_anchor), path('timeout/', views.timeout), From a11ff19ee9abb8f980bd73c768f67111d37733b8 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 13:25:52 +0100 Subject: [PATCH 090/188] Add debug logging --- linkcheck/models.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index a4bf7ca..0853d14 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -33,7 +33,7 @@ TOLERATE_BROKEN_ANCHOR, ) -logger = logging.getLogger('linkcheck') +logger = logging.getLogger(__name__) EXTERNAL_REGEX = re.compile(EXTERNAL_REGEX_STRING) @@ -160,6 +160,7 @@ def check_url(self, check_internal=True, check_external=True, external_recheck_i return self.status def _check_internal(self, tested_url): + logger.debug('checking internal link: %s', tested_url) from linkcheck.utils import LinkCheckHandler @@ -244,10 +245,14 @@ def _check_internal(self, tested_url): self.save() def _check_external(self, tested_url, external_recheck_interval): - logger.info('checking external link: %s' % tested_url) + logger.info('checking external link: %s', tested_url) external_recheck_datetime = now() - timedelta(minutes=external_recheck_interval) if self.last_checked and (self.last_checked > external_recheck_datetime): + logger.debug( + 'URL was last checked in the last %s minutes, so not checking it again', + external_recheck_interval + ) return self.status # Remove URL fragment identifiers @@ -272,6 +277,7 @@ def _check_external(self, tested_url, external_recheck_interval): response = requests.head(url, **request_params) if response.status_code >= 400: + logger.debug('HEAD is not allowed, retry with GET') # If HEAD is not allowed, let's try with GET response = requests.get(url, **request_params) except ReadTimeout: @@ -282,11 +288,13 @@ def _check_external(self, tested_url, external_recheck_interval): self.message = 'Other Error: %s' % e else: self.message = f"{response.status_code} {response.reason}" + logger.debug('Response message: %s', self.message) if response.ok and response.status_code not in REDIRECT_STATI: self.status = True # If initial response was a redirect, return the initial return code if response.history: + logger.debug('Redirect history: %r', response.history) self.message = f"{response.history[0].status_code} {response.history[0].reason}" self.redirect_to = response.url @@ -308,7 +316,8 @@ def _check_anchor(self, anchor, html, internal=True): self.message += f', broken {scope} hash anchor' if not TOLERATE_BROKEN_ANCHOR: self.status = False - except UnicodeDecodeError: + except UnicodeDecodeError as e: + logger.debug('UnicodeDecodeError while parsing anchors: %s', e) self.message += ', failed to parse HTML for anchor' if not TOLERATE_BROKEN_ANCHOR: self.status = False @@ -353,6 +362,7 @@ def link_post_delete(sender, instance, **kwargs): url = instance.url count = url.links.all().count() if count == 0: + logger.debug('This was the last link for %r, so deleting it', url) url.delete() except Url.DoesNotExist: pass From 2469c521133be8addb979c25b3f3990cf5a62adf Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 13:32:36 +0100 Subject: [PATCH 091/188] Remove unused imports --- linkcheck/__init__.py | 2 -- linkcheck/cron.py | 2 +- linkcheck/tests/test_linkcheck.py | 2 -- linkcheck/views.py | 2 -- 4 files changed, 1 insertion(+), 7 deletions(-) diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index b912a91..4ee803b 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -1,8 +1,6 @@ import threading from html.parser import HTMLParser -import django - # A global lock, showing whether linkcheck is busy update_lock = threading.Lock() diff --git a/linkcheck/cron.py b/linkcheck/cron.py index 7d4a333..36fe673 100644 --- a/linkcheck/cron.py +++ b/linkcheck/cron.py @@ -9,7 +9,7 @@ from django_cron import cronScheduler from django_cron import Job -from django_cron import HOUR, DAY, WEEK, MONTH +from django_cron import WEEK from linkcheck.utils import check_links from linkcheck.utils import find_all_links diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 74b0968..6aac6ca 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -2,9 +2,7 @@ from io import StringIO from unittest.mock import patch import os -import re -import django from django.apps import apps from django.conf import settings from django.contrib.auth.models import User diff --git a/linkcheck/views.py b/linkcheck/views.py index 5408988..85883cf 100644 --- a/linkcheck/views.py +++ b/linkcheck/views.py @@ -2,9 +2,7 @@ from itertools import groupby from operator import itemgetter -import django from django import forms -from django.conf import settings from django.contrib.admin.views.decorators import staff_member_required from django.contrib.contenttypes.models import ContentType from django.core.exceptions import ObjectDoesNotExist From 8a97901d6490fcbbe111040a52d176be2daf0a80 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 13:39:13 +0100 Subject: [PATCH 092/188] Small code cleanups --- linkcheck/models.py | 27 +++++++++++++-------------- linkcheck/utils.py | 2 +- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/linkcheck/models.py b/linkcheck/models.py index 0853d14..de2ccf9 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -75,7 +75,7 @@ def type(self): return 'mailto' if self.url.startswith('tel'): return 'phone' - elif str(self.url)=='': + elif self.url == '': return 'empty' elif self.url.startswith('#'): return 'anchor' @@ -138,7 +138,7 @@ def check_url(self, check_internal=True, check_external=True, external_recheck_i elif root_domain.startswith('test.'): root_domain = root_domain[5:] internal_exceptions = [ - 'http://'+root_domain, 'http://www./'+root_domain, 'http://test./'+root_domain, + 'http://' + root_domain, 'http://www./' + root_domain, 'http://test./' + root_domain, 'https://' + root_domain, 'https://www./' + root_domain, 'https://test./' + root_domain, ] @@ -164,7 +164,7 @@ def _check_internal(self, tested_url): from linkcheck.utils import LinkCheckHandler - if not(tested_url): + if not tested_url: self.message = 'Empty link' elif tested_url.startswith('mailto:'): @@ -199,7 +199,7 @@ def _check_internal(self, tested_url): self.message = 'Working internal hash anchor' self.status = True else: - hash = hash[1:] #'#something' => 'something' + hash = hash[1:] # '#something' => 'something' html_content = '' for field in instance._linklist.html_fields: html_content += getattr(instance, field, '') @@ -275,11 +275,10 @@ def _check_external(self, tested_url, external_recheck_interval): else: # Might as well just do a HEAD request response = requests.head(url, **request_params) - - if response.status_code >= 400: - logger.debug('HEAD is not allowed, retry with GET') # If HEAD is not allowed, let's try with GET - response = requests.get(url, **request_params) + if response.status_code >= 400: + logger.debug('HEAD is not allowed, retry with GET') + response = requests.get(url, **request_params) except ReadTimeout: self.message = 'Other Error: The read operation timed out' except ConnectionError as e: @@ -357,8 +356,8 @@ def __repr__(self): def link_post_delete(sender, instance, **kwargs): try: - #url.delete() => link.delete() => link_post_delete - #in this case link.url is already deleted from db, so we need a try here. + # url.delete() => link.delete() => link_post_delete + # in this case link.url is already deleted from db, so we need a try here. url = instance.url count = url.links.all().count() if count == 0: @@ -373,7 +372,7 @@ def format_connection_error(e): Helper function to provide better readable output of connection errors """ # If the exception message is wrapped in an "HTTPSConnectionPool", only give the underlying cause - reason = re.search("\(Caused by ([a-zA-Z]+\(.+\))\)", str(e)) + reason = re.search(r"\(Caused by ([a-zA-Z]+\(.+\))\)", str(e)) if not reason: return f"Connection Error: {e}" reason = reason[1] @@ -391,7 +390,7 @@ def format_new_connection_error(reason): Helper function to provide better readable output of new connection errors thrown by urllib3 """ connection_reason = re.search( - "NewConnectionError\(': (.+)'\)", + r"NewConnectionError\(': (.+)'\)", reason, ) if connection_reason: @@ -403,10 +402,10 @@ def format_ssl_error(reason): """ Helper function to provide better readable output of SSL errors thrown by urllib3 """ - ssl_reason = re.search("SSLError\([a-zA-Z]+\((.+)\)\)", reason) + ssl_reason = re.search(r"SSLError\([a-zA-Z]+\((.+)\)\)", reason) if ssl_reason: # If the reason lies withing the ssl c library, hide additional debug output - ssl_c_reason = re.search("1, '\[SSL: [A-Z\d_]+\] (.+) \(_ssl\.c:\d+\)'", ssl_reason[1]) + ssl_c_reason = re.search(r"1, '\[SSL: [A-Z\d_]+\] (.+) \(_ssl\.c:\d+\)'", ssl_reason[1]) if ssl_c_reason: return f"SSL Error: {ssl_c_reason[1]}" return f"SSL Error: {ssl_reason[1]}" diff --git a/linkcheck/utils.py b/linkcheck/utils.py index 26ea687..bf833f6 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -103,7 +103,7 @@ def check_links(external_recheck_interval=10080, limit=-1, check_internal=True, for u in urls: status = u.check_url(http://github.com/check_internal=check_internal,%20check_external=check_external) check_count += 1 if status is not None else 0 - if limit > -1 and check_count >= limit: + if -1 < limit <= check_count: break return check_count From 3d81e0ab41b0be1cb019d72cfdc64071a904c12a Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 13:48:45 +0100 Subject: [PATCH 093/188] Use f-strings instead of %-formatters --- linkcheck/__init__.py | 2 +- linkcheck/apps.py | 6 +++--- linkcheck/filebrowser.py | 8 ++++---- linkcheck/models.py | 4 ++-- linkcheck/tests/sampleapp/models.py | 2 +- linkcheck/tests/test_linkcheck.py | 28 ++++++++++++++-------------- linkcheck/utils.py | 2 +- linkcheck/views.py | 2 +- 8 files changed, 27 insertions(+), 27 deletions(-) diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index 4ee803b..ba85208 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -29,7 +29,7 @@ def handle_starttag(self, tag, attrs): elif tag == 'img' and self.in_a: src = [v for k, v in attrs if k == 'src'] if src: - self.text += ' [image:%s] ' % src[0] + self.text += f' [image:{src[0]}] ' def handle_endtag(self, tag): if tag == 'a' and self.in_a: diff --git a/linkcheck/apps.py b/linkcheck/apps.py index 1d2532b..5de8d7c 100644 --- a/linkcheck/apps.py +++ b/linkcheck/apps.py @@ -20,19 +20,19 @@ def ready(self): def build_linklists(self): """Autodiscovery of linkLists""" for app in apps.get_app_configs(): - module_name = "%s.linklists" % app.name + module_name = f"{app.name}.linklists" if not importlib.util.find_spec(module_name): continue the_module = importlib.import_module(module_name) try: for k in the_module.linklists.keys(): if k in self.all_linklists.keys(): - raise AlreadyRegistered('The key %s is already registered in all_linklists' % k) + raise AlreadyRegistered(f'The key {k} is already registered in all_linklists') for l in the_module.linklists.values(): for l2 in self.all_linklists.values(): if l.model == l2.model: - raise AlreadyRegistered('The LinkList %s is already registered in all_linklists' % l) + raise AlreadyRegistered(f'The LinkList {l} is already registered in all_linklists') self.all_linklists.update(the_module.linklists) except AttributeError: pass diff --git a/linkcheck/filebrowser.py b/linkcheck/filebrowser.py index 1c5303a..7f74a1a 100644 --- a/linkcheck/filebrowser.py +++ b/linkcheck/filebrowser.py @@ -30,7 +30,7 @@ def handle_upload(sender, path=None, **kwargs): count = url_qs.count() if count: url_qs.update(status=True, message='Working document link') - msg = "Please note. Uploading %s has corrected %s broken link%s. See the Link Manager for more details" % (url, count, count > 1 and 's' or '') + msg = f"Please note. Uploading {url} has corrected {count} broken link{count > 1 and 's' or ''}. See the Link Manager for more details" messages.info(sender, msg) @@ -52,7 +52,7 @@ def isdir(filename): old_count = old_url_qs.count() if old_count: old_url_qs.update(status=False, message='Missing Document') - msg = "Warning. Renaming %s has caused %s link%s to break. Please use the Link Manager to fix them" % (old_url, old_count, old_count > 1 and 's' or '') + msg = f"Warning. Renaming {old_url} has caused {old_count} link{old_count > 1 and 's' or ''} to break. Please use the Link Manager to fix them" messages.info(sender, msg) # The new directory may fix some invalid links, so we also check for that @@ -68,7 +68,7 @@ def isdir(filename): if new_count: new_url_qs.update(status=True, message='Working document link') if new_count: - msg = "Please note. Renaming %s has corrected %s broken link%s. See the Link Manager for more details" % (new_url, new_count, new_count > 1 and 's' or '') + msg = f"Please note. Renaming {new_url} has corrected {new_count} broken link{new_count > 1 and 's' or ''}. See the Link Manager for more details" messages.info(sender, msg) @@ -79,7 +79,7 @@ def handle_delete(sender, path=None, **kwargs): count = url_qs.count() if count: url_qs.update(status=False, message='Missing Document') - msg = "Warning. Deleting %s has caused %s link%s to break. Please use the Link Manager to fix them" % (url, count, count > 1 and 's' or '') + msg = f"Warning. Deleting {url} has caused {count} link{count > 1 and 's' or ''} to break. Please use the Link Manager to fix them" messages.info(sender, msg) diff --git a/linkcheck/models.py b/linkcheck/models.py index de2ccf9..8dd44b5 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -265,7 +265,7 @@ def _check_external(self, tested_url, external_recheck_interval): request_params = { 'allow_redirects': True, - 'headers': {'User-Agent' : "http://%s Linkchecker" % settings.SITE_DOMAIN}, + 'headers': {'User-Agent' : f"http://{settings.SITE_DOMAIN} Linkchecker"}, 'timeout': LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, } try: @@ -284,7 +284,7 @@ def _check_external(self, tested_url, external_recheck_interval): except ConnectionError as e: self.message = format_connection_error(e) except Exception as e: - self.message = 'Other Error: %s' % e + self.message = f'Other Error: {e}' else: self.message = f"{response.status_code} {response.reason}" logger.debug('Response message: %s', self.message) diff --git a/linkcheck/tests/sampleapp/models.py b/linkcheck/tests/sampleapp/models.py index af87065..e53e253 100644 --- a/linkcheck/tests/sampleapp/models.py +++ b/linkcheck/tests/sampleapp/models.py @@ -6,7 +6,7 @@ class Book(models.Model): description = models.TextField() def get_absolute_url(http://github.com/self): - return "/book/%s/" % self.id + return f"/book/{self.id}/" class Author(models.Model): diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 6aac6ca..48a3077 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -148,77 +148,77 @@ def test_internal_check_media_utf8(self): @override_settings(SITE_DOMAIN='example.com') class ExternalCheckTestCase(LiveServerTestCase): def test_external_check_200(self): - uv = Url(url="%s/http/200/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/http/200/") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') self.assertEqual(uv.redirect_to, '') def test_external_check_200_missing_cert(self): - uv = Url(url="%s/http/200/" % self.live_server_url.replace("http://", "https://")) + uv = Url(url=f"{self.live_server_url.replace('http://', 'https://')}/http/200/") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'SSL Error: wrong version number') self.assertEqual(uv.redirect_to, '') def test_external_check_200_utf8(self): - uv = Url(url="%s/http/200/r%%C3%%BCckmeldung/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/http/200/r%C3%BCckmeldung/") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') # Also when the url is not encoded - uv = Url(url="%s/http/200/rückmeldung/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/http/200/rückmeldung/") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') def test_external_check_301(self): - uv = Url(url="%s/http/301/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/http/301/") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, '301 Moved Permanently') self.assertEqual(uv.redirect_to, '') def test_external_check_301_followed(self): - uv = Url(url="%s/http/redirect/301/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/http/redirect/301/") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '301 Moved Permanently') - self.assertEqual(uv.redirect_to, '%s/http/200/' % self.live_server_url) + self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/200/') def test_external_check_302_followed(self): - uv = Url(url="%s/http/redirect/302/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/http/redirect/302/") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '302 Found') - self.assertEqual(uv.redirect_to, '%s/http/200/' % self.live_server_url) + self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/200/') def test_external_check_404(self): - uv = Url(url="%s/whatever/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/whatever/") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message.lower(), '404 not found') def test_external_check_redirect_final_404(self): - uv = Url(url="%s/http/redirect_to_404/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/http/redirect_to_404/") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message.lower(), '404 not found') def test_external_check_get_only(self): # An URL that allows GET but not HEAD, linkcheck should fallback on GET. - uv = Url(url="%s/http/getonly/405/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/http/getonly/405/") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') # Same test with other 40x error - uv = Url(url="%s/http/getonly/400/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/http/getonly/400/") uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, '200 OK') def test_external_check_timedout(self): - uv = Url(url="%s/timeout/" % self.live_server_url) + uv = Url(url=f"{self.live_server_url}/timeout/") uv.check_url() self.assertEqual(uv.status, False) self.assertEqual(uv.message, 'Other Error: The read operation timed out') diff --git a/linkcheck/utils.py b/linkcheck/utils.py index bf833f6..6c30d79 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -310,7 +310,7 @@ def get_coverage_data(): break if should_append: all_model_list.append({ - 'name': '%s.%s' % (model._meta.app_label, model._meta.object_name), + 'name': f'{model._meta.app_label}.{model._meta.object_name}', 'is_covered': is_model_covered(model), 'suggested_config': get_suggested_linklist_config(model), }) diff --git a/linkcheck/views.py b/linkcheck/views.py index 85883cf..5406ae6 100644 --- a/linkcheck/views.py +++ b/linkcheck/views.py @@ -124,7 +124,7 @@ def report(request): admin_url = object.get_admin_url() # TODO allow method name to be configurable except AttributeError: try: - admin_url = reverse('admin:%s_%s_change' % (content_type.app_label, content_type.model), args=[ok]) + admin_url = reverse(f'admin:{content_type.app_label}_{content_type.model}_change', args=[ok]) except NoReverseMatch: admin_url = None From 77e3b7f6cfc15f9b70399821126e6364033d3dc7 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 22:03:57 +0100 Subject: [PATCH 094/188] Update AUTHORS --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3d65753..1432766 100644 --- a/AUTHORS +++ b/AUTHORS @@ -3,3 +3,4 @@ Fruits Chen Tim Graves Jannis Leidel Claude Paroz +Timo Ludwig From 47bdb5659e1963259ad8efc460aad09ff5b34ef9 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 22:11:50 +0100 Subject: [PATCH 095/188] Update supported versions in README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 1114d86..e42c104 100644 --- a/README.rst +++ b/README.rst @@ -27,7 +27,7 @@ automatically when objects are saved. This is handled by signals. Minimal requirements -------------------- -django-linkchecks requires Python 3 and Django 2.2. +django-linkcheck requires Python 3.7 and Django 3.2. Basic usage ----------- From 84508a3f1cabf6aab7bf6197cbc9c2fc92343b73 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 22:39:20 +0100 Subject: [PATCH 096/188] Update contribution guide --- .gitignore | 4 +--- README.rst | 26 ++++++++++++++++++++++---- setup.py | 2 +- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index c423c52..61d9422 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ __pycache__/ # Distribution / packaging .Python +.venv/ env/ build/ develop-eggs/ @@ -55,6 +56,3 @@ docs/_build/ # PyBuilder target/ - -# IDE/Editor stuff -.idea/* \ No newline at end of file diff --git a/README.rst b/README.rst index e42c104..31f821b 100644 --- a/README.rst +++ b/README.rst @@ -210,24 +210,42 @@ django-filebrowser integration If django-filebrowser is present on your path then linkcheck will listen to the post-upload, delete and rename signals and update itself according +Contributing +------------ + +You can install all requirements of the development setup with: + +.. code-block:: bash + + $ python3 -m venv .venv + $ source .venv/bin/activate + $ pip install -e . + Running tests -------------- +~~~~~~~~~~~~~ + +Tests can be run standalone by using the ``runtests.py`` script in linkcheck root: + +.. code-block:: bash -Tests can be run standalone by using the runtests.py script in linkcheck root: $ python runtests.py -If you want to run linkcheck tests in the context of your project, you should include 'linkcheck.tests.sampleapp' in your INSTALLED_APPS setting. +If you want to run linkcheck tests in the context of your project, you should include ``'linkcheck.tests.sampleapp'`` in your ``INSTALLED_APPS`` setting. Linkcheck gives you two context managers to enable or disable listeners in your own tests. For example: +.. code-block:: python3 + def test_something_without_listeners(self): with listeners.disable_listeners(): # Create/update here without linkcheck intervening. -In the case you defined the LINKCHECK_DISABLE_LISTENERS setting, you can +In the case you defined the ``LINKCHECK_DISABLE_LISTENERS`` setting, you can temporarily enable it by: +.. code-block:: python3 + def test_something_with_listeners(self): with listeners.enable_listeners(): # Create/update here and see linkcheck activated. diff --git a/setup.py b/setup.py index db8aadf..96e074f 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def read(fname): license='BSD', url='https://github.com/DjangoAdminHackers/django-linkcheck', include_package_data=True, - install_requires=['requests'], + install_requires=['django', 'requests'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Web Environment', From 174afb279c532a7b9ef7ca90252f1fbf1d5b6cec Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Mon, 21 Nov 2022 16:36:08 +0100 Subject: [PATCH 097/188] Add flake8 - Add flake8 to dev requirements - Add flake8 GitHub action - Add flake8 pre-commit hook --- .github/workflows/linting.yml | 18 ++++++++++++++++++ .pre-commit-config.yaml | 6 ++++++ README.rst | 8 +++++++- setup.py | 3 +++ 4 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/linting.yml create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..42fd34f --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,18 @@ +name: Linting + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + flake8: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + - name: Install dependencies + run: pip install flake8 + - name: Run flake8 + run: flake8 --max-line-length=120 linkcheck diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..f6b4d33 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: + - repo: https://github.com/pycqa/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + args: [--max-line-length=120] diff --git a/README.rst b/README.rst index 31f821b..62202c1 100644 --- a/README.rst +++ b/README.rst @@ -219,7 +219,13 @@ You can install all requirements of the development setup with: $ python3 -m venv .venv $ source .venv/bin/activate - $ pip install -e . + $ pip install -e .[dev] + +If you want to make use of the flake8 pre-commit hook, enable it with: + +.. code-block:: bash + + $ pre-commit install Running tests ~~~~~~~~~~~~~ diff --git a/setup.py b/setup.py index 96e074f..f320039 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,9 @@ def read(fname): url='https://github.com/DjangoAdminHackers/django-linkcheck', include_package_data=True, install_requires=['django', 'requests'], + extras_require={ + "dev": ["flake8", "pre-commit"], + }, classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Web Environment', From aad5892db240d36be6f7d40915a8e2b29888e9ac Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Mon, 21 Nov 2022 16:29:46 +0100 Subject: [PATCH 098/188] Fix flake8 warnings --- linkcheck/__init__.py | 8 ++--- linkcheck/apps.py | 8 ++--- linkcheck/cron.py | 25 ++++++++++------ linkcheck/dashboard.py | 2 +- linkcheck/filebrowser.py | 26 +++++++++++----- linkcheck/listeners.py | 6 ++-- .../management/commands/checkexternal.py | 13 ++++---- .../management/commands/checkinternal.py | 3 +- linkcheck/management/commands/checklinks.py | 12 +++++--- linkcheck/management/commands/findlinks.py | 5 +++- linkcheck/models.py | 15 +++++----- linkcheck/tests/sampleapp/models.py | 2 +- linkcheck/tests/test_linkcheck.py | 30 +++++++++++++------ linkcheck/tests/urls.py | 5 +++- linkcheck/utils.py | 30 ++++++++++++------- linkcheck/views.py | 7 +++-- setup.py | 2 +- 17 files changed, 128 insertions(+), 71 deletions(-) diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index ba85208..ba0949e 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -47,7 +47,7 @@ class ImageLister(Lister): def handle_starttag(self, tag, attrs): if tag == 'img': - src = [v for k, v in attrs if k=='src'] + src = [v for k, v in attrs if k == 'src'] if src: self.urls.append(('', src[0])) @@ -62,17 +62,17 @@ def reset(self): self.names = [] def handle_starttag(self, tag, attributes): - name = [v for k, v in attributes if k=='id'] + name = [v for k, v in attributes if k == 'id'] if name: self.names.append(name[0]) if tag == 'a': - name = [v for k, v in attributes if k=='name'] + name = [v for k, v in attributes if k == 'name'] if name: self.names.append(name[0]) def parse(obj, field, parser): - html = getattr(obj,field) + html = getattr(obj, field) if html: parser.feed(html) parser.close() diff --git a/linkcheck/apps.py b/linkcheck/apps.py index 5de8d7c..3b82cd3 100644 --- a/linkcheck/apps.py +++ b/linkcheck/apps.py @@ -29,10 +29,10 @@ def build_linklists(self): if k in self.all_linklists.keys(): raise AlreadyRegistered(f'The key {k} is already registered in all_linklists') - for l in the_module.linklists.values(): - for l2 in self.all_linklists.values(): - if l.model == l2.model: - raise AlreadyRegistered(f'The LinkList {l} is already registered in all_linklists') + for link_list in the_module.linklists.values(): + for link_list2 in self.all_linklists.values(): + if link_list.model == link_list2.model: + raise AlreadyRegistered(f"The LinkList {link_list} is already registered in all_linklists") self.all_linklists.update(the_module.linklists) except AttributeError: pass diff --git a/linkcheck/cron.py b/linkcheck/cron.py index 36fe673..c78ebf3 100644 --- a/linkcheck/cron.py +++ b/linkcheck/cron.py @@ -20,29 +20,36 @@ class RunLinkCheckFind(Job): - run_every = WEEK + run_every = WEEK + + def job(self): + find_all_links() - def job(self): - find_all_links() cronScheduler.register(RunLinkCheckFind) class RunLinkCheckInternal(Job): - run_every = WEEK + run_every = WEEK + + def job(self): + check_links(limit=MAX_CHECKS_PER_RUN, check_external=False) - def job(self): - check_links(limit=MAX_CHECKS_PER_RUN, check_external=False) cronScheduler.register(RunLinkCheckInternal) class RunLinkCheckExternal(Job): - run_every = WEEK + run_every = WEEK + + def job(self): + check_links( + external_recheck_interval=EXTERNAL_RECHECK_INTERVAL, + limit=MAX_CHECKS_PER_RUN, + check_internal=False, + ) - def job(self): - check_links(external_recheck_interval=EXTERNAL_RECHECK_INTERVAL, limit=MAX_CHECKS_PER_RUN, check_internal=False) cronScheduler.register(RunLinkCheckExternal) diff --git a/linkcheck/dashboard.py b/linkcheck/dashboard.py index b733c5d..bc8cb54 100644 --- a/linkcheck/dashboard.py +++ b/linkcheck/dashboard.py @@ -13,4 +13,4 @@ {'title': 'Untested links', 'url': reverse('linkcheck_report') + '?filters=show_unchecked'}, {'title': 'Ignored links', 'url': reverse('linkcheck_report') + '?filters=ignored'}, ) -) \ No newline at end of file +) diff --git a/linkcheck/filebrowser.py b/linkcheck/filebrowser.py index 7f74a1a..b92bbc5 100644 --- a/linkcheck/filebrowser.py +++ b/linkcheck/filebrowser.py @@ -29,8 +29,11 @@ def handle_upload(sender, path=None, **kwargs): url_qs = Url.objects.filter(url=url).filter(status=False) count = url_qs.count() if count: - url_qs.update(status=True, message='Working document link') - msg = f"Please note. Uploading {url} has corrected {count} broken link{count > 1 and 's' or ''}. See the Link Manager for more details" + url_qs.update(status=True, message="Working document link") + msg = ( + f"Please note. Uploading {url} has corrected {count} broken link{count > 1 and 's' or ''}. " + "See the Link Manager for more details" + ) messages.info(sender, msg) @@ -51,8 +54,11 @@ def isdir(filename): old_url_qs = Url.objects.filter(url__startswith=old_url).filter(status=True) old_count = old_url_qs.count() if old_count: - old_url_qs.update(status=False, message='Missing Document') - msg = f"Warning. Renaming {old_url} has caused {old_count} link{old_count > 1 and 's' or ''} to break. Please use the Link Manager to fix them" + old_url_qs.update(status=False, message="Missing Document") + msg = ( + f"Warning. Renaming {old_url} has caused {old_count} link{old_count > 1 and 's' or ''} to break. " + "Please use the Link Manager to fix them" + ) messages.info(sender, msg) # The new directory may fix some invalid links, so we also check for that @@ -68,7 +74,10 @@ def isdir(filename): if new_count: new_url_qs.update(status=True, message='Working document link') if new_count: - msg = f"Please note. Renaming {new_url} has corrected {new_count} broken link{new_count > 1 and 's' or ''}. See the Link Manager for more details" + msg = ( + f"Please note. Renaming {new_url} has corrected {new_count} broken link{new_count > 1 and 's' or ''}. " + "See the Link Manager for more details" + ) messages.info(sender, msg) @@ -78,8 +87,11 @@ def handle_delete(sender, path=None, **kwargs): url_qs = Url.objects.filter(url=url).filter(status=True) count = url_qs.count() if count: - url_qs.update(status=False, message='Missing Document') - msg = f"Warning. Deleting {url} has caused {count} link{count > 1 and 's' or ''} to break. Please use the Link Manager to fix them" + url_qs.update(status=False, message="Missing Document") + msg = ( + f"Warning. Deleting {url} has caused {count} link{count > 1 and 's' or ''} to break. " + "Please use the Link Manager to fix them" + ) messages.info(sender, msg) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index 1a84e9e..100ce2c 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -79,7 +79,7 @@ def do_check_instance_links(sender, instance, wait=False): new_links = [] old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) - linklists = linklist_cls().get_linklist(extra_filter={'pk':instance.pk,}) + linklists = linklist_cls().get_linklist(extra_filter={'pk': instance.pk}) if not linklists: # This object is no longer watched by linkcheck according to object_filter @@ -102,7 +102,9 @@ def do_check_instance_links(sender, instance, wait=False): continue u, created = Url.objects.get_or_create(url=url) - l, created = Link.objects.get_or_create(url=u, field=link[0], text=link[1], content_type=content_type, object_id=instance.pk) + l, created = Link.objects.get_or_create( + url=u, field=link[0], text=link[1], content_type=content_type, object_id=instance.pk + ) new_links.append(l.id) if internal_hash: setattr(u, '_internal_hash', internal_hash) diff --git a/linkcheck/management/commands/checkexternal.py b/linkcheck/management/commands/checkexternal.py index dac77fc..92afe9e 100644 --- a/linkcheck/management/commands/checkexternal.py +++ b/linkcheck/management/commands/checkexternal.py @@ -10,12 +10,16 @@ class Command(BaseCommand): help = 'Check and record external link status' def add_arguments(self, parser): - parser.add_argument('-e', '--externalinterval', type=int, + parser.add_argument( + '-e', '--externalinterval', type=int, help='Specifies the length of time in minutes until external links are rechecked. ' - 'Defaults to linkcheck_config setting') - parser.add_argument('-l', '--limit', type=int, + 'Defaults to linkcheck_config setting' + ) + parser.add_argument( + '-l', '--limit', type=int, help='Specifies the maximum number (int) of links to be checked. ' - 'Defaults to linkcheck_config setting. Value less than 1 will check all') + 'Defaults to linkcheck_config setting. Value less than 1 will check all' + ) def handle(self, *args, **options): externalinterval = options['externalinterval'] or EXTERNAL_RECHECK_INTERVAL @@ -27,4 +31,3 @@ def handle(self, *args, **options): check_count = check_links(external_recheck_interval=externalinterval, limit=limit, check_internal=False) return "%s external URLs have been checked." % (check_count) - diff --git a/linkcheck/management/commands/checkinternal.py b/linkcheck/management/commands/checkinternal.py index 4922110..b1aaae6 100644 --- a/linkcheck/management/commands/checkinternal.py +++ b/linkcheck/management/commands/checkinternal.py @@ -9,7 +9,8 @@ class Command(BaseCommand): help = 'Check and record internal link status' def add_arguments(self, parser): - parser.add_argument('-l', '--limit', type=int, + parser.add_argument( + '-l', '--limit', type=int, help='Specifies the maximum number (int) of links to be checked. ' 'Defaults to linkcheck_config setting. Value less than 1 will check all') diff --git a/linkcheck/management/commands/checklinks.py b/linkcheck/management/commands/checklinks.py index bb9a542..9d877f5 100644 --- a/linkcheck/management/commands/checklinks.py +++ b/linkcheck/management/commands/checklinks.py @@ -9,12 +9,16 @@ class Command(BaseCommand): help = 'Check and record internal and external link status' def add_arguments(self, parser): - parser.add_argument('-e', '--externalinterval', type=int, + parser.add_argument( + '-e', '--externalinterval', type=int, help='Specifies the length of time in minutes until external links are rechecked. ' - 'Defaults to linkcheck_config setting') - parser.add_argument('-l', '--limit', type=int, + 'Defaults to linkcheck_config setting' + ) + parser.add_argument( + '-l', '--limit', type=int, help='Specifies the maximum number (int) of links to be checked. ' - 'Defaults to linkcheck_config setting. Value less than 1 will check all') + 'Defaults to linkcheck_config setting. Value less than 1 will check all' + ) def handle(self, *args, **options): externalinterval = options['externalinterval'] or EXTERNAL_RECHECK_INTERVAL diff --git a/linkcheck/management/commands/findlinks.py b/linkcheck/management/commands/findlinks.py index 316a608..eb6ce73 100644 --- a/linkcheck/management/commands/findlinks.py +++ b/linkcheck/management/commands/findlinks.py @@ -5,7 +5,10 @@ class Command(BaseCommand): - help = "Goes through all models registered with Linkcheck, records any new links found and removes all outdated links" + help = ( + "Goes through all models registered with Linkcheck, records any new links found" + "and removes all outdated links" + ) def handle(self, *args, **options): self.stdout.write("Updating all links...") diff --git a/linkcheck/models.py b/linkcheck/models.py index 8dd44b5..3cd4c3b 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -61,7 +61,8 @@ class Url(models.Model): Represents a distinct URL found somewhere in the models registered with linkcheck A single Url can have multiple Links associated with it. """ - url = models.CharField(max_length=MAX_URL_LENGTH, unique=True) # See http://www.boutell.com/newfaq/misc/urllength.html + # See http://www.boutell.com/newfaq/misc/urllength.html + url = models.CharField(max_length=MAX_URL_LENGTH, unique=True) last_checked = models.DateTimeField(blank=True, null=True) status = models.BooleanField(null=True) message = models.CharField(max_length=1024, blank=True, null=True) @@ -180,8 +181,9 @@ def _check_internal(self, tested_url): self.message = 'Link to within the same page (not automatically checked)' elif tested_url.startswith(MEDIA_PREFIX): - # TODO Assumes a direct mapping from media url to local filesystem path. This will break quite easily for alternate setups - path = settings.MEDIA_ROOT + unquote(tested_url)[len(MEDIA_PREFIX)-1:] + # TODO: Assumes a direct mapping from media url to local filesystem path. + # This will break quite easily for alternate setups + path = settings.MEDIA_ROOT + unquote(tested_url)[len(MEDIA_PREFIX) - 1:] decoded_path = html_decode(path) if os.path.exists(path) or os.path.exists(decoded_path): self.message = 'Working file link' @@ -191,11 +193,9 @@ def _check_internal(self, tested_url): elif getattr(self, '_internal_hash', False) and getattr(self, '_instance', None): # This is a hash link pointing to itself - from linkcheck import parse_anchors - hash = self._internal_hash instance = self._instance - if hash == '#': # special case, point to # + if hash == '#': # special case, point to # self.message = 'Working internal hash anchor' self.status = True else: @@ -265,7 +265,7 @@ def _check_external(self, tested_url, external_recheck_interval): request_params = { 'allow_redirects': True, - 'headers': {'User-Agent' : f"http://{settings.SITE_DOMAIN} Linkchecker"}, + 'headers': {'User-Agent': f"http://{settings.SITE_DOMAIN} Linkchecker"}, 'timeout': LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, } try: @@ -321,6 +321,7 @@ def _check_anchor(self, anchor, html, internal=True): if not TOLERATE_BROKEN_ANCHOR: self.status = False + class Link(models.Model): """ A Link represents a specific URL in a specific field in a specific model diff --git a/linkcheck/tests/sampleapp/models.py b/linkcheck/tests/sampleapp/models.py index e53e253..76fc9fc 100644 --- a/linkcheck/tests/sampleapp/models.py +++ b/linkcheck/tests/sampleapp/models.py @@ -2,7 +2,7 @@ class Book(models.Model): - title = models.CharField(max_length=50) + title = models.CharField(max_length=50) description = models.TextField() def get_absolute_url(http://github.com/self): diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 48a3077..85727a0 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -283,7 +283,10 @@ def test_repr(self): ) self.assertEqual( repr(Link.objects.first()), - ", source: )>", + ( + ", " + "source: )>" + ), ) @@ -340,13 +343,20 @@ def test_found_links(self): def test_urls_exceeding_max_length(self): self.assertEqual(Url.objects.all().count(), 0) with self.assertLogs(logger="linkcheck", level="WARN") as cm: - Book.objects.create(title='My Title', description=f""" - Here's a link: Example, - and here's a url exceeding the max length: logo""") + Book.objects.create( + title="My Title", + description=( + "Here's a link: Example, and here's a url exceeding " + f"the max length: logo" + ), + ) # We skip urls which are too long because we can't store them in the database self.assertIn( - f"WARNING:linkcheck.listeners:URL exceeding max length will be skipped: http://www.example.org/{MAX_URL_LENGTH * 'X'}", - cm.output + ( + "WARNING:linkcheck.listeners:URL exceeding max length will be skipped: " + f"http://www.example.org/%7BMAX_URL_LENGTH%20*'X'}" + ), + cm.output, ) self.assertEqual(Url.objects.all().count(), 1) @@ -498,12 +508,14 @@ def setUp(self): User.objects.create_superuser('admin', 'admin@example.org', 'password') def test_display_url(http://github.com/self): - Book.objects.create(title='My Title', description="""Here's a link: Example""") + Book.objects.create( + title='My Title', description="Here's a link: Example" + ) Author.objects.create(name="John Smith", website="http://www.example.org/#john") self.assertEqual(Link.objects.count(), 2) self.assertEqual( - set([l.display_url for l in Link.objects.all()]), - set(['http://www.example.org/', 'http://www.example.org/#john']) + set([link.display_url for link in Link.objects.all()]), + set(["http://www.example.org/", "http://www.example.org/#john"]), ) def test_report_view(self): diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py index bb0eeb1..68ee438 100644 --- a/linkcheck/tests/urls.py +++ b/linkcheck/tests/urls.py @@ -5,7 +5,10 @@ from linkcheck.tests.sampleapp import views -handler404 = lambda *args, **kwargs: http.HttpResponseNotFound('') + +def handler404(*args, **kwargs): + return http.HttpResponseNotFound("") + urlpatterns = [ path('admin/linkcheck/', include('linkcheck.urls')), diff --git a/linkcheck/utils.py b/linkcheck/utils.py index 6c30d79..8886458 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -18,13 +18,14 @@ class LinkCheckHandler(ClientHandler): # Customize the ClientHandler to allow us removing some middlewares def load_middleware(self): - self.ignore_keywords = ['reversion.middleware','MaintenanceModeMiddleware'] + self.ignore_keywords = ['reversion.middleware', 'MaintenanceModeMiddleware'] super().load_middleware() new_request_middleware = [] - #############################_request_middleware################################# - # _request_middleware is removed in newer django. - if getattr(self, '_request_middleware', None): + #################################################### + # _request_middleware (is removed in newer django) # + #################################################### + if getattr(self, "_request_middleware", None): for method in self._request_middleware: ignored = False for keyword in self.ignore_keywords: @@ -35,7 +36,9 @@ def load_middleware(self): new_request_middleware.append(method) self._request_middleware = new_request_middleware - #############################_view_middleware################################# + #################### + # _view_middleware # + #################### new_view_middleware = [] for method in self._view_middleware: ignored = False @@ -47,8 +50,10 @@ def load_middleware(self): new_view_middleware.append(method) self._view_middleware = new_view_middleware - #############################_response_middleware################################# - if getattr(self, '_response_middleware', None): + ########################## + # _response_middleware## # + ########################## + if getattr(self, "_response_middleware", None): new_response_middleware = [] for method in self._response_middleware: ignored = False @@ -60,9 +65,10 @@ def load_middleware(self): new_response_middleware.append(method) self._response_middleware = new_response_middleware - - #############################_template_response_middleware################################# - if getattr(self, '_template_response_middleware', None): + ################################# + # _template_response_middleware # + ################################# + if getattr(self, "_template_response_middleware", None): new_template_response_middleware = [] for method in self._template_response_middleware: ignored = False @@ -74,7 +80,9 @@ def load_middleware(self): new_template_response_middleware.append(method) self._template_response_middleware = new_template_response_middleware - #############################_exception_middleware################################# + ######################### + # _exception_middleware # + ######################### new_exception_middleware = [] for method in self._exception_middleware: ignored = False diff --git a/linkcheck/views.py b/linkcheck/views.py index 5406ae6..661da0d 100644 --- a/linkcheck/views.py +++ b/linkcheck/views.py @@ -96,8 +96,8 @@ def report(request): paginated_links = Paginator(qset, RESULTS_PER_PAGE, 0, True) try: - page = int(request.GET.get('page', '1')) - except: + page = int(request.GET.get("page", "1")) + except ValueError: page = 0 # offset = (page - 1) * RESULTS_PER_PAGE links = paginated_links.page(page) @@ -130,7 +130,8 @@ def report(request): objects.append({ 'object': object, - 'link_list': Link.objects.in_bulk([x['id'] for x in og]).values(), # Convert values_list back to queryset. Do we need to get values() or do we just need a list of ids? + # Convert values_list back to queryset. Do we need to get values() or do we just need a list of ids? + 'link_list': Link.objects.in_bulk([x['id'] for x in og]).values(), 'admin_url': admin_url, }) content_types_list.append({ diff --git a/setup.py b/setup.py index f320039..cf3e5b6 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ import os -from setuptools import find_packages, setup +from setuptools import setup def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() From ef25860191ea833ea54bf90d0485ccdd3fbf570c Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 22:47:46 +0100 Subject: [PATCH 099/188] Add isort - Add requirement extra for devs - Add isort GitHub action to check formatting of imports - Add isort pre-commit hook --- .github/workflows/linting.yml | 8 ++++++++ .pre-commit-config.yaml | 5 +++++ README.rst | 4 ++-- setup.py | 3 ++- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 42fd34f..bd578d0 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -16,3 +16,11 @@ jobs: run: pip install flake8 - name: Run flake8 run: flake8 --max-line-length=120 linkcheck + isort: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + - uses: jamescurtin/isort-action@master + with: + configuration: --multi-line=3 --trailing-comma --check-only diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f6b4d33..7446c2b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,3 +4,8 @@ repos: hooks: - id: flake8 args: [--max-line-length=120] + - repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: [--multi-line=3, --trailing-comma] diff --git a/README.rst b/README.rst index 62202c1..ef760c4 100644 --- a/README.rst +++ b/README.rst @@ -213,7 +213,7 @@ If django-filebrowser is present on your path then linkcheck will listen to the Contributing ------------ -You can install all requirements of the development setup with: +You can install all requirements of the development setup with the extra ``dev``: .. code-block:: bash @@ -221,7 +221,7 @@ You can install all requirements of the development setup with: $ source .venv/bin/activate $ pip install -e .[dev] -If you want to make use of the flake8 pre-commit hook, enable it with: +If you want to make use of the flake8 and isort pre-commit hooks, enable them with: .. code-block:: bash diff --git a/setup.py b/setup.py index cf3e5b6..13790bf 100644 --- a/setup.py +++ b/setup.py @@ -2,6 +2,7 @@ from setuptools import setup + def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() @@ -18,7 +19,7 @@ def read(fname): include_package_data=True, install_requires=['django', 'requests'], extras_require={ - "dev": ["flake8", "pre-commit"], + "dev": ["flake8", "isort", "pre-commit"], }, classifiers=[ 'Development Status :: 5 - Production/Stable', From a6599a61c11026c3fb394ba9f213a21fea111d30 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Sun, 20 Nov 2022 22:47:59 +0100 Subject: [PATCH 100/188] Apply isort --- examples/linklists.py | 3 ++- linkcheck/cron.py | 14 ++++++-------- linkcheck/dashboard.py | 3 +-- linkcheck/filebrowser.py | 8 +++++--- linkcheck/listeners.py | 6 +++--- linkcheck/management/commands/checkexternal.py | 6 ++++-- linkcheck/management/commands/checkinternal.py | 2 +- linkcheck/management/commands/checklinks.py | 5 ++++- linkcheck/migrations/0001_initial.py | 2 +- linkcheck/migrations/0002_url_redirect_to.py | 2 +- linkcheck/models.py | 17 ++++++++--------- linkcheck/tests/sampleapp/linklists.py | 3 ++- linkcheck/tests/sampleapp/views.py | 7 ++++++- linkcheck/tests/test_linkcheck.py | 12 ++++++++---- linkcheck/utils.py | 12 ++++++++---- linkcheck/views.py | 2 +- runtests.py | 3 +-- 17 files changed, 62 insertions(+), 45 deletions(-) diff --git a/examples/linklists.py b/examples/linklists.py index 1181eb8..a5f600f 100644 --- a/examples/linklists.py +++ b/examples/linklists.py @@ -1,6 +1,7 @@ -from linkcheck import Linklist from cms.models import Page +from linkcheck import Linklist + class PageLinklist(Linklist): diff --git a/linkcheck/cron.py b/linkcheck/cron.py index c78ebf3..ec30a0b 100644 --- a/linkcheck/cron.py +++ b/linkcheck/cron.py @@ -7,15 +7,13 @@ # # Links are checked via signals any time a link-containing object is saved by Django -from django_cron import cronScheduler -from django_cron import Job -from django_cron import WEEK +from django_cron import WEEK, Job, cronScheduler -from linkcheck.utils import check_links -from linkcheck.utils import find_all_links - -from linkcheck.linkcheck_settings import EXTERNAL_RECHECK_INTERVAL -from linkcheck.linkcheck_settings import MAX_CHECKS_PER_RUN +from linkcheck.linkcheck_settings import ( + EXTERNAL_RECHECK_INTERVAL, + MAX_CHECKS_PER_RUN, +) +from linkcheck.utils import check_links, find_all_links class RunLinkCheckFind(Job): diff --git a/linkcheck/dashboard.py b/linkcheck/dashboard.py index bc8cb54..721bad3 100644 --- a/linkcheck/dashboard.py +++ b/linkcheck/dashboard.py @@ -1,8 +1,7 @@ from admin_tools.dashboard import modules -from linkcheck.views import get_status_message - from django.urls import reverse +from linkcheck.views import get_status_message linkcheck_dashboard_module = modules.LinkList( title="Linkchecker", diff --git a/linkcheck/filebrowser.py b/linkcheck/filebrowser.py index b92bbc5..bdbe86b 100644 --- a/linkcheck/filebrowser.py +++ b/linkcheck/filebrowser.py @@ -5,10 +5,12 @@ from django.contrib import messages try: - from filebrowser.views import filebrowser_post_upload - from filebrowser.views import filebrowser_post_rename - from filebrowser.views import filebrowser_post_delete from filebrowser.settings import DIRECTORY + from filebrowser.views import ( + filebrowser_post_delete, + filebrowser_post_rename, + filebrowser_post_upload, + ) FILEBROWSER_PRESENT = True except ImportError: FILEBROWSER_PRESENT = False diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index 100ce2c..14a783d 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -8,10 +8,10 @@ from django.apps import apps from django.db.models import signals as model_signals -from . import filebrowser -from . import update_lock +from linkcheck.models import Link, Url + +from . import filebrowser, update_lock from .linkcheck_settings import MAX_URL_LENGTH -from linkcheck.models import Url, Link logger = logging.getLogger(__name__) diff --git a/linkcheck/management/commands/checkexternal.py b/linkcheck/management/commands/checkexternal.py index 92afe9e..a8c32f3 100644 --- a/linkcheck/management/commands/checkexternal.py +++ b/linkcheck/management/commands/checkexternal.py @@ -1,8 +1,10 @@ from django.core.management.base import BaseCommand +from linkcheck.linkcheck_settings import ( + EXTERNAL_RECHECK_INTERVAL, + MAX_CHECKS_PER_RUN, +) from linkcheck.utils import check_links -from linkcheck.linkcheck_settings import EXTERNAL_RECHECK_INTERVAL -from linkcheck.linkcheck_settings import MAX_CHECKS_PER_RUN class Command(BaseCommand): diff --git a/linkcheck/management/commands/checkinternal.py b/linkcheck/management/commands/checkinternal.py index b1aaae6..551c31e 100644 --- a/linkcheck/management/commands/checkinternal.py +++ b/linkcheck/management/commands/checkinternal.py @@ -1,7 +1,7 @@ from django.core.management.base import BaseCommand -from linkcheck.utils import check_links from linkcheck.linkcheck_settings import MAX_CHECKS_PER_RUN +from linkcheck.utils import check_links class Command(BaseCommand): diff --git a/linkcheck/management/commands/checklinks.py b/linkcheck/management/commands/checklinks.py index 9d877f5..c97f407 100644 --- a/linkcheck/management/commands/checklinks.py +++ b/linkcheck/management/commands/checklinks.py @@ -1,6 +1,9 @@ from django.core.management.base import BaseCommand -from linkcheck.linkcheck_settings import EXTERNAL_RECHECK_INTERVAL, MAX_CHECKS_PER_RUN +from linkcheck.linkcheck_settings import ( + EXTERNAL_RECHECK_INTERVAL, + MAX_CHECKS_PER_RUN, +) from linkcheck.utils import check_links diff --git a/linkcheck/migrations/0001_initial.py b/linkcheck/migrations/0001_initial.py index 4a5e40e..6996af0 100644 --- a/linkcheck/migrations/0001_initial.py +++ b/linkcheck/migrations/0001_initial.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from django.db import models, migrations +from django.db import migrations, models class Migration(migrations.Migration): diff --git a/linkcheck/migrations/0002_url_redirect_to.py b/linkcheck/migrations/0002_url_redirect_to.py index 560dd73..81edd76 100644 --- a/linkcheck/migrations/0002_url_redirect_to.py +++ b/linkcheck/migrations/0002_url_redirect_to.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from django.db import models, migrations +from django.db import migrations, models class Migration(migrations.Migration): diff --git a/linkcheck/models.py b/linkcheck/models.py index 3cd4c3b..bab8992 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -1,13 +1,10 @@ -import re +import logging import os.path - +import re from datetime import timedelta -import logging -import requests -from requests.exceptions import ConnectionError, ReadTimeout -from requests.models import REDIRECT_STATI from urllib.parse import unquote +import requests from django.conf import settings from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType @@ -16,6 +13,8 @@ from django.test.utils import modify_settings from django.utils.encoding import iri_to_uri from django.utils.timezone import now +from requests.exceptions import ConnectionError, ReadTimeout +from requests.models import REDIRECT_STATI try: from reversion.revisions import revision_context_manager @@ -24,12 +23,12 @@ USE_REVERSION = False from .linkcheck_settings import ( + EXTERNAL_RECHECK_INTERVAL, + EXTERNAL_REGEX_STRING, + LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, MAX_URL_LENGTH, MEDIA_PREFIX, SITE_DOMAINS, - EXTERNAL_REGEX_STRING, - EXTERNAL_RECHECK_INTERVAL, - LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, TOLERATE_BROKEN_ANCHOR, ) diff --git a/linkcheck/tests/sampleapp/linklists.py b/linkcheck/tests/sampleapp/linklists.py index 9af5d85..80f579f 100644 --- a/linkcheck/tests/sampleapp/linklists.py +++ b/linkcheck/tests/sampleapp/linklists.py @@ -1,4 +1,5 @@ -from django.db.models import Subquery, OuterRef +from django.db.models import OuterRef, Subquery + from linkcheck import Linklist from linkcheck.tests.sampleapp.models import Author, Book, Journal diff --git a/linkcheck/tests/sampleapp/views.py b/linkcheck/tests/sampleapp/views.py index 3eba199..836d091 100644 --- a/linkcheck/tests/sampleapp/views.py +++ b/linkcheck/tests/sampleapp/views.py @@ -1,5 +1,10 @@ import time -from django.http import HttpResponse, HttpResponsePermanentRedirect, HttpResponseRedirect + +from django.http import ( + HttpResponse, + HttpResponsePermanentRedirect, + HttpResponseRedirect, +) def http_response(request, code): diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 85727a0..633405f 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -1,7 +1,7 @@ +import os from datetime import datetime, timedelta from io import StringIO from unittest.mock import patch -import os from django.apps import apps from django.conf import settings @@ -11,12 +11,16 @@ from django.test.utils import override_settings from django.urls import reverse +from linkcheck.linkcheck_settings import MAX_URL_LENGTH from linkcheck.listeners import ( - enable_listeners, disable_listeners, linkcheck_worker, register_listeners, - tasks_queue, unregister_listeners, + disable_listeners, + enable_listeners, + linkcheck_worker, + register_listeners, + tasks_queue, + unregister_listeners, ) from linkcheck.models import Link, Url -from linkcheck.linkcheck_settings import MAX_URL_LENGTH from linkcheck.views import get_jquery_min_js from .sampleapp.models import Author, Book, Journal diff --git a/linkcheck/utils.py b/linkcheck/utils.py index 8886458..d178f5b 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -1,14 +1,18 @@ import logging +from datetime import timedelta + from django.apps import apps from django.db import models from django.test.client import ClientHandler from django.utils import timezone -from datetime import timedelta - +from .linkcheck_settings import ( + HTML_FIELD_CLASSES, + IMAGE_FIELD_CLASSES, + MAX_URL_LENGTH, + URL_FIELD_CLASSES, +) from .models import Link, Url -from .linkcheck_settings import MAX_URL_LENGTH, HTML_FIELD_CLASSES, IMAGE_FIELD_CLASSES, URL_FIELD_CLASSES - logger = logging.getLogger(__name__) diff --git a/linkcheck/views.py b/linkcheck/views.py index 661da0d..92b6db1 100644 --- a/linkcheck/views.py +++ b/linkcheck/views.py @@ -10,7 +10,7 @@ from django.http import HttpResponse from django.shortcuts import render from django.templatetags.static import static -from django.urls import reverse, NoReverseMatch +from django.urls import NoReverseMatch, reverse from django.views.decorators.csrf import csrf_exempt from linkcheck import update_lock diff --git a/runtests.py b/runtests.py index de1804c..c9e50d7 100644 --- a/runtests.py +++ b/runtests.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import sys - -from os.path import dirname, abspath +from os.path import abspath, dirname import django from django.conf import settings From 789075804a6d8297ee5b8b7ecf7489b41520e142 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Thu, 24 Nov 2022 21:20:48 +0100 Subject: [PATCH 101/188] Fix type property for internal URLs When the URL started with the `EXTERNAL_REGEX`, its type was incorrectly given as "external", even though the check for internal URLs was used. Fixes #141 --- CHANGELOG | 1 + linkcheck/models.py | 83 ++++++++++++++++++++----------- linkcheck/tests/test_linkcheck.py | 9 ++++ 3 files changed, 65 insertions(+), 28 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index b392dbe..5cfae6b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ Unreleased +* Fix `type` property for internal URLs * Fix incorrect message when redirect has broken anchor (Timo Ludwig, #128) * Breaking change: Treat broken hash anchors as valid diff --git a/linkcheck/models.py b/linkcheck/models.py index bab8992..0b9d5ff 100644 --- a/linkcheck/models.py +++ b/linkcheck/models.py @@ -12,6 +12,7 @@ from django.test.client import Client from django.test.utils import modify_settings from django.utils.encoding import iri_to_uri +from django.utils.functional import cached_property from django.utils.timezone import now from requests.exceptions import ConnectionError, ReadTimeout from requests.models import REDIRECT_STATI @@ -69,7 +70,7 @@ class Url(models.Model): @property def type(self): - if EXTERNAL_REGEX.match(self.url): + if self.external: return 'external' if self.url.startswith('mailto'): return 'mailto' @@ -81,8 +82,10 @@ def type(self): return 'anchor' elif self.url.startswith(MEDIA_PREFIX): return 'file' + elif self.internal_url.startswith('/'): + return 'internal' else: - return 'unknown' + return 'invalid' @property def get_message(self): @@ -106,31 +109,27 @@ def __str__(self): def __repr__(self): return f"" - @property - def external(self): - return EXTERNAL_REGEX.match(self.url) - - def check_url(http://github.com/self,%20check_internal=True,%20check_external=True,%20external_recheck_interval=EXTERNAL_RECHECK_INTERVAL): - """ - Return: - * True if the link was checked and found valid - * False if the link was checked and found invalid - * None if the link was not checked + @cached_property + def internal_url(http://github.com/self): """ + Remove current domain from URLs as the test client chokes when trying to test them during a page save + They shouldn't generally exist but occasionally slip through + If settings.SITE_DOMAINS isn't set then use settings.SITE_DOMAIN + but also check for variants: example.org, www.example.org, test.example.org - self.status = False + In case the URLs is external, `None` is returned. + """ - # Remove current domain from URLs as the test client chokes when trying to test them during a page save - # They shouldn't generally exist but occasionally slip through - # If settings.SITE_DOMAINS isn't set then use settings.SITE_DOMAIN - # but also check for variants: example.org, www.example.org, test.example.org + # If the URL is not external, directly return it without processing + if not EXTERNAL_REGEX.match(self.url): + return self.url - tested_url = self.url # May receive transformation before being checked + # May receive transformation before being checked + prepared_url = self.url internal_exceptions = [] if SITE_DOMAINS: # If the setting is present internal_exceptions = SITE_DOMAINS - elif getattr(settings, 'SITE_DOMAIN', None): # try using SITE_DOMAIN root_domain = settings.SITE_DOMAIN if root_domain.startswith('www.'): @@ -138,21 +137,49 @@ def check_url(self, check_internal=True, check_external=True, external_recheck_i elif root_domain.startswith('test.'): root_domain = root_domain[5:] internal_exceptions = [ - 'http://' + root_domain, 'http://www./' + root_domain, 'http://test./' + root_domain, - 'https://' + root_domain, 'https://www./' + root_domain, 'https://test./' + root_domain, + f'{protocol}://{sub}{root_domain}' for sub in ['', 'www.', 'test.'] for protocol in ['http', 'https'] ] for ex in internal_exceptions: - if ex and tested_url.startswith(ex): - tested_url = tested_url.replace(ex, '', 1) + if ex and prepared_url.startswith(ex): + prepared_url = prepared_url.replace(ex, '', 1) + + # If the URL is still external, return `None` + if EXTERNAL_REGEX.match(prepared_url): + return None + + logger.debug('Internal URL: %s', prepared_url) + return prepared_url - external = bool(EXTERNAL_REGEX.match(tested_url)) + @property + def internal(self): + """ + Check whether this URL is internal + """ + return self.internal_url is not None + + @property + def external(self): + """ + Check whether this URL is external + """ + return not self.internal + + def check_url(http://github.com/self,%20check_internal=True,%20check_external=True,%20external_recheck_interval=EXTERNAL_RECHECK_INTERVAL): + """ + Return: + * True if the link was checked and found valid + * False if the link was checked and found invalid + * None if the link was not checked + """ + + self.status = False - if check_internal and not external: - self._check_internal(tested_url) + if check_internal and self.internal: + self._check_internal(self.internal_url) - elif check_external and external: - self._check_external(tested_url, external_recheck_interval) + elif check_external and self.external: + self._check_external(self.url, external_recheck_interval) else: return None diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 633405f..b28c6ef 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -100,6 +100,15 @@ def test_internal_check_found(self): uv.check_url() self.assertEqual(uv.status, True) self.assertEqual(uv.message, 'Working internal link') + self.assertEqual(uv.type, 'internal') + + def test_internal_check_with_protocol(self): + # "localhost" is configured as SITE_DOMAIN in settings + uv = Url(url="http://localhost/public/") + uv.check_url() + self.assertEqual(uv.status, True) + self.assertEqual(uv.message, 'Working internal link') + self.assertEqual(uv.type, 'internal') def test_internal_check_broken_internal_link(self): uv = Url(url="/broken/internal/link") From 23783861b2d0cb87a002e87e65942006e77b3137 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Tue, 22 Nov 2022 14:07:53 +0100 Subject: [PATCH 102/188] Make templates translatable --- .../0005_alter_link_id_alter_url_id.py | 27 ++++++++++++++ .../templates/linkcheck/base_linkcheck.html | 16 +++++++-- linkcheck/templates/linkcheck/coverage.html | 21 ++++++++--- .../{paginator.xhtml => paginator.html} | 19 +++++----- linkcheck/templates/linkcheck/report.html | 36 ++++++++++--------- .../linkcheck/suggested_configs.html | 2 +- .../templatetags/linkcheck_model_tags.py | 11 ++++++ linkcheck/tests/test_linkcheck.py | 2 +- linkcheck/views.py | 9 ++--- 9 files changed, 105 insertions(+), 38 deletions(-) create mode 100644 linkcheck/migrations/0005_alter_link_id_alter_url_id.py rename linkcheck/templates/linkcheck/{paginator.xhtml => paginator.html} (56%) create mode 100644 linkcheck/templatetags/linkcheck_model_tags.py diff --git a/linkcheck/migrations/0005_alter_link_id_alter_url_id.py b/linkcheck/migrations/0005_alter_link_id_alter_url_id.py new file mode 100644 index 0000000..2633e1d --- /dev/null +++ b/linkcheck/migrations/0005_alter_link_id_alter_url_id.py @@ -0,0 +1,27 @@ +# Generated by Django 4.1.3 on 2022-11-24 15:58 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("linkcheck", "0004_remove_url_still_exists"), + ] + + operations = [ + migrations.AlterField( + model_name="link", + name="id", + field=models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + migrations.AlterField( + model_name="url", + name="id", + field=models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ] diff --git a/linkcheck/templates/linkcheck/base_linkcheck.html b/linkcheck/templates/linkcheck/base_linkcheck.html index 926ceca..03bdcd8 100644 --- a/linkcheck/templates/linkcheck/base_linkcheck.html +++ b/linkcheck/templates/linkcheck/base_linkcheck.html @@ -1,8 +1,20 @@ {% extends "admin/change_list.html" %} -{% block breadcrumbs %}{% endblock %} +{% load i18n %} + +{% block title %} + {% translate "Link Checker" %} {{ block.super }} +{% endblock %} + +{% block breadcrumbs %} + +{% endblock %} + {% block content %}
-

Link Checker

+

{% translate "Link Checker" %}

{% block innercontent %} {% endblock %} diff --git a/linkcheck/templates/linkcheck/coverage.html b/linkcheck/templates/linkcheck/coverage.html index e81ca6b..0f1c4eb 100644 --- a/linkcheck/templates/linkcheck/coverage.html +++ b/linkcheck/templates/linkcheck/coverage.html @@ -1,20 +1,33 @@ {% extends "linkcheck/base_linkcheck.html" %} +{% load i18n %} {% load linkcheck_coverage_tags %} {% block extrahead %}{% endblock %} +{% block title %} + {% translate "Coverage" %} | {{ block.super }} +{% endblock %} + +{% block breadcrumbs %} + +{% endblock %} + {% block innercontent %} - - - + + + {% for model in coverage_data %}
ModelCoveredSuggested config{% translate "Model" %}{% translate "Covered" %}{% translate "Suggested config" %}
{{ model.name }} - {{ model.is_covered }} + {{ model.is_covered|yesno:_("Yes,No") }} {% if not model.is_covered %} diff --git a/linkcheck/templates/linkcheck/paginator.xhtml b/linkcheck/templates/linkcheck/paginator.html similarity index 56% rename from linkcheck/templates/linkcheck/paginator.xhtml rename to linkcheck/templates/linkcheck/paginator.html index 296d669..d48eb0d 100644 --- a/linkcheck/templates/linkcheck/paginator.xhtml +++ b/linkcheck/templates/linkcheck/paginator.html @@ -1,31 +1,32 @@ +{% load i18n %}
{% if pages.number > 1 %} < First {% else %} - < First + < {% translate "First" %} {% endif %} {% if pages.has_previous %} - << Previous + << {% translate "Previous" %} {% else %} - << Previous + << {% translate "Previous" %} {% endif %} - - Page {{ pages.number }} of {{ pages.paginator.num_pages }}. + + {% blocktrans with current=pages.number max=pages.paginator.num_pages %}Page {{ current }} of {{ max }}{% endblocktrans %} {% if pages.has_next %} - Next >> + {% translate "Next" %} >> {% else %} - Next >> + {% translate "Next" %} >> {% endif %} {% if pages.number != pages.paginator.num_pages %} - Last > + {% translate "Last" %} > {% else %} - Last > + {% translate "Last" %} > {% endif %}
diff --git a/linkcheck/templates/linkcheck/report.html b/linkcheck/templates/linkcheck/report.html index 827f995..419ef42 100644 --- a/linkcheck/templates/linkcheck/report.html +++ b/linkcheck/templates/linkcheck/report.html @@ -1,4 +1,6 @@ {% extends "linkcheck/base_linkcheck.html" %} +{% load i18n %} +{% load linkcheck_model_tags %} {% block extrahead %} {{ block.super }}