diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml deleted file mode 100644 index 7fdf3a1..0000000 --- a/.github/workflows/linting.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: Linting - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - -jobs: - flake8: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - - name: Install dependencies - run: pip install flake8 - - name: Run flake8 - run: flake8 --max-line-length=120 linkcheck - isort: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - - uses: jamescurtin/isort-action@master - with: - configuration: --multi-line=3 --trailing-comma --check-only diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 289608a..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: Test - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - max-parallel: 5 - matrix: - python-version: ['3.10', '3.11', '3.12', '3.13', '3.14'] - django-version: ['4.2', '5.2', '6.0'] - exclude: - - python-version: '3.10' - django-version: '6.0' - - python-version: '3.11' - django-version: '6.0' - - python-version: '3.13' - django-version: '4.2' - - python-version: '3.14' - django-version: '4.2' - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install --upgrade django~=${{ matrix.django-version }}.0 - python -m pip install --upgrade requests - python -m pip install --upgrade requests_mock - - - name: Run tests - run: python runtests.py diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 61d9422..0000000 --- a/.gitignore +++ /dev/null @@ -1,58 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] - -# C extensions -*.so - -# Distribution / packaging -.Python -.venv/ -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover - -# Translations -*.mo -*.pot - -# Django stuff: -*.log - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 870722b..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -repos: - - repo: https://github.com/pycqa/flake8 - rev: 6.1.0 - hooks: - - id: flake8 - args: [--max-line-length=120] - - repo: https://github.com/PyCQA/isort - rev: 5.12.0 - hooks: - - id: isort - args: [--multi-line=3, --trailing-comma] diff --git a/CHANGELOG b/CHANGELOG deleted file mode 100644 index 1b24a94..0000000 --- a/CHANGELOG +++ /dev/null @@ -1,227 +0,0 @@ -Unreleased - - * Add support for Django 6.0 - * Add support for Python 3.14 - * Remove support for Python 3.9 - -2.4.0 (2025-09-28) - -* Add index to Link (David Venhoff, #202) -* Add support for Django 5.1 and 5.2 -* Add support for Python 3.13 -* Remove support for Django < 4.2 -* Remove support for Python 3.8 -* Remove `raven_compat` middleware if present -* Updated fallback User Agent string -* Reference object primary key by `pk` instead of `id` (Ben Stockermans) -* More robust check for unsaved instances in `instance_pre_save` (Ben Stockermans) -* Add `LINKCHECK_PROXIES` and `LINKCHECK_TRUST_PROXY_SSL` settings to be able to - pass a proxy to requests when cheking links. (Ben Stockermans) -* Add `LINKCHECK_SKIP_TRANSLATIONS` env variable to skip translation compilation - during package building (Ben Stockermans) - -2.3.0 (2023-12-27) - -* Fix encoding of utf-8 domain names (Timo Brembeck, #190) -* Move coverage view to management command (Timo Brembeck, #187) - * Add new management command `linkcheck_suggest_config` - * Delete coverage view -* Improve formatting for `NameResolutionError` (Timo Brembeck, #192) -* Fix internal redirect checker (Timo Ludwig, #180) -* Fix SSL status of unreachable domains (Timo Ludwig, #184) -* Fix URL message for internal server errorrs (Timo Ludwig, #182) -* Add support for Django 4.2 and 5.0 -* Add support for Python 3.12 -* Remove support for Django 4.0 -* Remove support for Python 3.7 - -2.2.1 (2023-04-03) - -* Include the compiled translations in the released package (#177) - -2.2.0 (2023-04-01) - -* Enable internationalization for URL status messages (Timo Ludwig, #125) -* Enable re-checking after rate limit was hit (Timo Ludwig, #153) -* Ignore raw `post_save` signal (Timo Ludwig, #106) -* Retry with fallback user agent on forbidden response (Timo Ludwig, #159) -* Also set `redirect_to` on internal redirects (Timo Ludwig, #163) -* Add new fields to `Url` model: - * `status_code`: The HTTP status code of the initial request - * `redirect_status_code`: The HTTP status code of the final request - * `anchor_status`: The validity of the HTML hash anchor - * `ssl_status` The validity of the SSL certificate - * `error_message` The error message if the request failed -* Add new properties to `Url` model: - * `anchor_message`: The human-readable meaning of the `anchor_status` - * `ssl_message` The human-readable meaning of the `ssl_status` -* Add French translations. - -2.1.0 (2023-02-05) - -* Fix `SSL Error` for missing root certificates (Timo Ludwig, #157) -* Fix `NotImplementedError`/`AssertionError` when checking - video links with hash anchors (Timo Ludwig, #150) -* Skip checking of hash anchors for non-html files -* Avoid decorating the `report` view with `csrf_exempt` (#155) -* recheck/ignore/unignore requests were using an obsolete `request.is_ajax` call - (#147) - -2.0.0 (2022-12-17) - -* Add German translations for filebrowser integration -* Fix django-filebrowser integration (Timo Ludwig, #144) -* Use `django.db.models.BigAutoField` as default auto field - (Timo Ludwig, #137) -* Add German translations for the templates -* Fix `type` property for internal URLs (Timo Ludwig, #141) -* Fix incorrect message when redirect has broken anchor - (Timo Ludwig, #128) -* Breaking change: Treat broken hash anchors as valid - unless `LINKCHECK_TOLERATE_BROKEN_ANCHOR` is manually - set to `False` (Timo Ludwig, #98) -* Remove unused field `still_exists` from `Url` model -* Delete outdated `Url` and `Link` objects when - running `findlinks` command (Timo Ludwig, #101) -* Avoid crash when unexpected error in signal listener occurs - (Sven Seeberg, #117) -* Ignore Urls longer than `MAX_URL_LENGTH` in signal listeners - (Timo Ludwig, #115) -* Verify SSL certificates (Timo Ludwig, #118) -* Added support for Python 3.10/3.11 and Django 4.1. -* Dropped support for Python 3.6 and Django < 3.2. - -1.9.1 (2022-03-23) - -* Added `Linklist.filter_callable` optional hook to allow for more - flexible link list filtering (Giuliano Mele). - -1.9 (2021-12-23) - -* Added support for Django 3.2 and 4.0 and removed support for Django < 2.2. -* Ignore raw `pre_save` signal (Timo Ludwig, #106). - -1.8.1 (2021-04-01) - -* The 1.8 release contained unwanted temporary stuff and was - therefore a broken release. Many thanks to Stefan Borer for - noticing that. - -1.8 (2021-02-25) - -* Added explicit `listeners.register_listeners` and - `listeners.unregister_listeners` functions. -* Added `listeners.enable_listeners` and `listeners.disable_listeners` context - managers. -* Avoid crash when looking for anchors in response content. -* Avoid possible failures when checking internal links depending on - ALLOWED_HOSTS setting. -* Confirmed compatibility with Django 3.1. -* Dropped support for Python 3.4. - -1.7 (2020-01-13) - -* Dropped support for Python 2 and Django < 1.11. -* Added support for Django 3.0. -* Made more usage of the requests library. - -1.6 (2019-03-20) - -* Use requests library when getting 'certificate verify failed' errors. -* Fixed compatibility issues with newer versions of Django. -* Fixed pip installation issues with encoding errors (#87). - -1.5 (2017-09-16) - -* Added support for `tel:` links. -* For redirecting links, linkcheck now reports the status of the redirect - target (#78). -* Dropped South migrations. -* 'Url.redirect_to' was migrated to a TextField to not limit its length (#75). -* Fixed handling of the '--limit' argument of the 'checklinks' command (#73). -* Fixed the task queue of links to check (#69). - -1.4 (2017-01-13) - -* Dropped support for Django 1.6 and Django 1.7, the minimal Python version is - now Python 2.7. Django 1.10 is also supported. -* Listeners registration and post_delete signal are now happening in the app - config ready() method. This means that the process can be customized by - having custom AppConfig classes and referring to those classes in the - INSTALLED_APPS setting. -* A new DISABLE_LISTENERS setting has been added to ease deactivation of - listeners registration. -* A task queue is now used to process link checking, so as to prevent exhaustion - of available threads during massive updates. - -1.3 (2016-06-05) - -* Django 1.9 compatibility added. -* When checking internal links, redirects are not followed any longer. -* Added support for the django-admin-tools dashboard, if present. -* Fixed a bug where internal links were skipped based on the external interval - setting. -* Handle situations where content_type.model_class() returns None. -* Allow extra field types to be added via settings. (Used for coverage view only). -* Improve coverage suggested configs - include 'ignore_empty' settings plus a - raw code view via /linkcheck/coverage?config=1. -* Fix - correctly handle tags that are inside tags. -* Fix - don't run pre_save if it's a new instance. - -1.2 (2015-11-13) - -* Added migration folders (missing in the 1.1 package). Also added support for - South migrations (compatibility). -* When a link produces a 301 Moved Permanently redirection, the redirect target - is stored in Url.redirect_to and displayed in the link report. -* Better support for URLs containing non-ASCII characters. - -1.1 (2015-06-03) - -* Minimal software requirements are now Python 2.6 / Django 1.6 (and South 1.0 if - you still use Django 1.6). -* Python 3 is supported. -* Django 1.7 / 1.8 compatibility added. -* notifications.py is now based on django-admin-blocks. -* Linklist classes now support an ignore_empty list to ignore empty URLField values. - -1.0 - -Changes: - -Bug fixes since 0.6 Please see commit log here: https://github.com/andybak/django-linkcheck/commits/master - -0.6.0 - -Changes: - -* Support ignoring (and unignoring) external broken links via buttons in the linkcheck report -* 'Recheck' button in the linkcheck report -* External links with anchors were being reported as broken because we switched to a HEAD request -* One particular url caused a crash in urllib2 when doing a HEAD request. Implemented a workaround: catch the exception and run a normal GET -* Inconsistant use of seconds in some places and minutes in others. Switch to minutes for all parameters. -* Clean up CSS -* Use normal links for navigating between report types instead of javascript+radio buttons -* Removed some unused javascript -* Fixed some issues with anchor links -* Broken link notification count was counting urls rather than links -* Mark length of url field configurable for those not cursed with MySQl -* Remove the pointless disinguishing images/documents/other in Url.type -* Document settings properly -* Remove unused pagination tag from template and thus dependency on django-pagination -* All tests now pass ( because I commented out the one that didn't :-P ) - -0.5.0 - -Start this changelog -Added some more comments throughout -Fixed dependency on django-filebrowser by wrapping it in an exception check -Handle get_absolute_url returning None -Use HEAD requests for checking external URLs -Handle HREF="#" correctly -Cleaner display of hashtag links in reports -Handle 'Bad Status Line' responses from remote servers. -Don't spawn a thread if running from tests as this prevents the new thread from seeing the same database transaction as the parent thread -Fix some tests from prior to the big refactor. nb Tests are still incomplete and many are broken :( -document filebrowser dependency - diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 839f8a1..0000000 --- a/LICENSE +++ /dev/null @@ -1,28 +0,0 @@ -Copyright (c) 2009-2010, Andy Baker and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - * Neither the name of the author nor the names of other - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index b979465..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,7 +0,0 @@ -include LICENSE -include CHANGELOG -include README.rst -include linkcheck/locale/*/LC_MESSAGES/django.mo -exclude linkcheck/locale/*/LC_MESSAGES/django.po -recursive-include linkcheck/templates/linkcheck * -recursive-include linkcheck/tests/media * diff --git a/README.rst b/README.rst deleted file mode 100644 index 4978fe2..0000000 --- a/README.rst +++ /dev/null @@ -1,319 +0,0 @@ - -django-linkcheck -=================== - -.. image:: https://github.com/DjangoAdminHackers/django-linkcheck/workflows/Test/badge.svg - :target: https://github.com/DjangoAdminHackers/django-linkcheck/actions - :alt: GitHub Actions - -.. image:: https://img.shields.io/pypi/v/django-linkcheck.svg - :alt: PyPI version - :target: https://pypi.org/project/django-linkcheck/ - -A fairly flexible app that will analyze and report on links in any model that -you register with it. - -.. image:: https://github.com/DjangoAdminHackers/django-linkcheck/raw/master/linkcheck.jpg - -Links can be bare (urls or image and file fields) or -embedded in HTML (linkcheck handles the parsing). It's fairly easy to override -methods of the Linkcheck object should you need to do anything more -complicated (like generate URLs from slug fields etc). - -You should run its management command via cron or similar to check external -links regularly to see if their status changes. All links are checked -automatically when objects are saved. This is handled by signals. - -Minimal requirements --------------------- - -django-linkcheck requires Python 3.10 and Django 4.2. - -Basic usage ------------ - -#. Install app to somewhere on your Python path (e.g. ``pip install - django-linkcheck``). If you do not need multilingual support, you can skip - the compilation of the translation files with an environment variable, e.g. - (``LINKCHECK_SKIP_TRANSLATIONS=true pip install django-linkcheck``). - -#. Add ``'linkcheck'`` to your ``settings.INSTALLED_APPS``. - -#. Add a file named ``linklists.py`` to every app (see an example in ``examples/linklists.py``) that either: - - #) has models that contain content (e.g. url/image fields, chunks of markup - or anything that gets transformed into a IMG or HREF when displayed - #) can be the target of a link - i.e. is addressed by a url - in this case - make sure it has an instance method named 'get_absolute_url' - - *Hint:* You can create a sample config for your model with:: - - manage.py linkcheck_suggest_config --model sampleapp.SampleModel > sampleapp/linklists.py - -#. Run ``./manage.py migrate``. - -#. Add to your root url config:: - - path('admin/linkcheck/', include('linkcheck.urls')) - -#. View ``/admin/linkcheck/`` from your browser. - -We are aware that this documentation is on the brief side of things so any -suggestions for elaboration or clarification would be gratefully accepted. - -Linklist classes ----------------- - -The following class attributes can be added to your ``Linklist`` subclasses to -customize the extracted links: - - ``object_filter``: a dictionary which will be passed as a filter argument to - the ``filter`` applied to the default queryset of the target class. This - allows you to filter the objects from which the links will be extracted. - (example: ``{'active': True}``) - - ``object_exclude``: a dictionary which will be passed as a filter argument to - the ``exclude`` applied to the default queryset of the target class. As with - ``object_filter``, this allows you to exclude objects from which the links - will be extracted. - - ``html_fields``: a list of field names which will be searched for links. - - ``url_fields``: a list of ``URLField`` field names whose content will be - considered as links. If the field content is empty and the field name is - in ``ignore_empty``, the content is ignored. - - ``ignore_empty``: a list of fields from ``url_fields``. See the explanation - above. (new in django-linkcheck 1.1) - - ``image_fields``: a list of ``ImageField`` field names whose content will be - considered as links. Empty ``ImageField`` content is always ignored. - - ``filter_callable``: a callable which allows to pass a function as filter - for your linklist class. It allows to apply more advanced filter operations. - This function must be a class method and it should be passed the objects query - set and return the filtered objects. - Example usage in your linklists.py - only check latest versions:: - - @classmethod - def filter_callable(cls, objects): - latest = Model.objects.filter(id=OuterRef('id')).order_by('-version') - return objects.filter(version=Subquery(latest.values('version')[:1])) - -Management commands -------------------- - -findlinks -~~~~~~~~~ - -This command goes through all registered fields and records the URLs it finds. -This command does not validate anything. Typically run just after installing -and configuring django-linkcheck. - -checklinks -~~~~~~~~~~ - -For each recorded URL, check and report the validity of the URL. All internal -links are checked, but only external links that have not been checked during -the last ``LINKCHECK_EXTERNAL_RECHECK_INTERVAL`` minutes are checked. This -interval can be adapted per-invocation by using the ``--externalinterval`` -(``-e``) command option (in minutes). - -You can also limit the maximum number of links to be checked by passing a number -to the ``--limit`` (``--l``) command option. - -linkcheck_suggest_config -~~~~~~~~~~~~~~~~~~~~~~~~ - -This command goes through all models and checks whether they contain fields that -can potentially be checked by linkcheck. -If they are not yet registered, a sample config is suggested. - -You can also pass the option ``--model`` to generate a sample config for the given model. - -Settings --------- - -LINKCHECK_DISABLE_LISTENERS -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A setting to totally disable linkcheck, typically when running tests. See also -the context managers below. - -LINKCHECK_EXTERNAL_RECHECK_INTERVAL -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Default: 10080 (1 week in minutes) - -Will not recheck any external link that has been checked more recently than this value. - -LINKCHECK_EXTERNAL_REGEX_STRING -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Default: r'^https?://' - -A string applied as a regex to a URL to determine whether it's internal or external. - -LINKCHECK_MEDIA_PREFIX -~~~~~~~~~~~~~~~~~~~~~~ - -Default: '/media/' - -Currently linkcheck tests whether links to internal static media are correct by wrangling the URL to be a local filesystem path. - -It strips MEDIA_PREFIX off the interal link and concatenates the result onto settings.MEDIA_ROOT and tests that using os.path.exists - -This 'works for me' but it is probably going to break for other people's setups. Patches welcome. - -LINKCHECK_RESULTS_PER_PAGE -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Controls pagination. - -Pagination is slightly peculiar at the moment due to the way links are grouped by object. - - -LINKCHECK_MAX_URL_LENGTH -~~~~~~~~~~~~~~~~~~~~~~~~ - -Default: 255 - -The length of the URL field. Defaults to 255 for compatibility with MySQL (see http://docs.djangoproject.com/en/dev/ref/databases/#notes-on-specific-fields ) - - -LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Default: 10 - -The timeout in seconds for each connection attempts. Sometimes it is useful to limit check time per connection in order to hold at bay the total check time. - - -SITE_DOMAIN and LINKCHECK_SITE_DOMAINS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Linkcheck tests external and internal using differently. Internal links use the Django test client whereas external links are tested using urllib2. - -Testing internal links this as if they were external can cause errors in some circumstances so Linkcheck needs to know which external urls are to be treated as internal. - -Linkcheck looks for either of the settings above. It only uses SITE_DOMAIN if LINKCHECK_SITE_DOMAINS isn't present - - -SITE_DOMAIN = "mysite.com" - -would tell linkchecker to treat the following as internal links: - -mysite.com -www.mysite.com -test.mysite.com - -If you instead set LINKCHECK_SITE_DOMAINS to be a list or tuple then you can explicitly list the domains that should be treated as internal. - - -LINKCHECK_TOLERATE_BROKEN_ANCHOR -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Default: ``True`` - -Whether links with broken hash anchors should be marked as valid. -Disable this if you want that links to anchors which are not contained in the link target's HTML source are marked as invalid. - - -LINKCHECK_PROXIES -~~~~~~~~~~~~~~~~~ - -Default: `{}` - -Allows you to make your `check_external` requests via a proxy. Expects a dictionary, e.g.: - -.. code-block:: python3 - - LINKCHECK_PROXIES = { - "http": "http://.../", - "https": "https://.../", - } - - -LINKCHECK_TRUST_PROXY_SSL -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Default: `False` - -If you are making your requests via a proxy, you can use this setting to turn off SSL verification for the proxy. - - -django-filebrowser integration ------------------------------- - -If django-filebrowser is present on your path then linkcheck will listen to the post-upload, delete and rename signals and update itself according - - -Contributing ------------- - -You can install all requirements of the development setup with the extra ``dev``: - -.. code-block:: bash - - $ python3 -m venv .venv - $ source .venv/bin/activate - $ pip install -e .[dev] - $ django-admin compilemessages --ignore=.venv # Optionally compile translation file - -If you want to make use of the flake8 and isort pre-commit hooks, enable them with: - -.. code-block:: bash - - $ pre-commit install - -Running tests -~~~~~~~~~~~~~ - -Tests can be run standalone by using the ``runtests.py`` script in linkcheck root: - -.. code-block:: bash - - $ python runtests.py - -If you want to run linkcheck tests in the context of your project, you should include ``'linkcheck.tests.sampleapp'`` in your ``INSTALLED_APPS`` setting. - -Linkcheck gives you two context managers to enable or disable listeners in your -own tests. For example: - -.. code-block:: python3 - - def test_something_without_listeners(self): - with listeners.disable_listeners(): - # Create/update here without linkcheck intervening. - -In the case you defined the ``LINKCHECK_DISABLE_LISTENERS`` setting, you can -temporarily enable it by: - -.. code-block:: python3 - - def test_something_with_listeners(self): - with listeners.enable_listeners(): - # Create/update here and see linkcheck activated. - -Translations -~~~~~~~~~~~~ - -At the moment this app is available in English, German, and French. -If you want to contribute translations for ``LOCALE``, run: - -.. code-block:: bash - - django-admin makemessages --locale LOCALE - -and edit the corresponding file in ``linkcheck/locale/LOCALE/LC_MESSAGES/django.po``. - -Create new release -~~~~~~~~~~~~~~~~~~ - -1. Bump version in `pyproject.toml <./pyproject.toml>`_ -2. Update `CHANGELOG <./CHANGELOG>`_ -3. Create release commit: ``git commit --message "Release vX.Y.Z"`` -4. Create git tag: ``git tag -a "X.Y.Z" -m "Release vX.Y.Z"`` -5. Push the commit and tag to the repository: ``git push && git push --tags`` -6. Build the source distribution: ``python -m build`` -7. Publish the package to PyPI: ``twine upload dist/django-linkcheck-X.Y.Z*`` diff --git a/examples/linkcheck.jpg b/examples/linkcheck.jpg deleted file mode 100644 index 2172184..0000000 Binary files a/examples/linkcheck.jpg and /dev/null differ diff --git a/examples/linklists.py b/examples/linklists.py deleted file mode 100644 index 1c0391e..0000000 --- a/examples/linklists.py +++ /dev/null @@ -1,13 +0,0 @@ -from cms.models import Page - -from linkcheck import Linklist - - -class PageLinklist(Linklist): - - model = Page - object_filter = {'active': True} - html_fields = ['content', 'extra_content'] - - -linklists = {'Pages': PageLinklist} diff --git a/index.html b/index.html new file mode 100644 index 0000000..f32976b --- /dev/null +++ b/index.html @@ -0,0 +1,119 @@ + + + + + + + andybak/django-linkcheck @ GitHub + + + + + + + Fork me on GitHub + +
+ +
+ + + + +
+ +

django-linkcheck + by andybak

+ +
+ An app that will analyze and report on links in any model that you register with it. +
+ +

http://cms.andybak.webfactional.com/media/linkcheck.jpg + +A fairly flexible app that will analyze and report on links in any model that you register with it. Links can be bare (urls or image and file fields) or embedded in HTML (linkcheck handles the parsing). It's fairly easy to override methods of the Linkcheck object should you need to do anything more complicated (like generate URLs from slug fields etc). + +The current version does all the finding and checking of links as a bulk job you can run from cron but it's fairly easy to hook up signals to your model's save and delete to keep the data updated once an initial sweep has been done. + +Another future enhancement would be the ability to automatically fix links when the related object is changed. + +This is working code but it currently comes with a few caveats: + + # It's been extracted from my homespun CMS and in some places that still shows. + # No tests :( + # Few comments or docstrings :( + # The documentation was rather a rush job :( + +Yes I'm a bad person. I'm putting this out there because doing so might inspire someone - hopefully me - to fix the above issues.

Dependencies

+

Jquery (my base admin template already links to it. If your's doesn't then add it to base_linkcheck.html +

+

Install

+

1. Install app to somewhere on your Python path +2. Edit examples/linkcheck_config.py to include references to all your models that might contain links. +3. Add something along the lines of linklists.py to every app you referenced in linkcheck_config.py +4. Import linkcheck_config.py from your root urls. +5. Syncdb +6. Run findlinks management command or the utils.find() +7. Run checklinks management command or the utils.check() +8. Add (r'^admin/linkcheck/', include('linkcheck.urls')) to your root url config +9. View /admin/linkcheck/ from your browser +

+

License

+

New BSD License

+

Authors

+

andybak (andy@andybak.net)

+

Contact

+

Andy Baker (andy@andybak.net)

+ + +

Download

+

+ You can download this project in either + zip or + tar formats. +

+

You can also clone the project with Git + by running: +

$ git clone git://github.com/andybak/django-linkcheck
+

+ + + +
+ + + + + diff --git a/linkcheck.jpg b/linkcheck.jpg deleted file mode 100644 index 2172184..0000000 Binary files a/linkcheck.jpg and /dev/null differ diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py deleted file mode 100644 index ba0949e..0000000 --- a/linkcheck/__init__.py +++ /dev/null @@ -1,216 +0,0 @@ -import threading -from html.parser import HTMLParser - -# A global lock, showing whether linkcheck is busy -update_lock = threading.Lock() - - -class Lister(HTMLParser): - - def reset(self): - HTMLParser.reset(self) - self.urls = [] - - -class URLLister(Lister): - - def __init__(self): - self.in_a = False - self.text = '' - self.url = '' - HTMLParser.__init__(self) - - def handle_starttag(self, tag, attrs): - if tag == 'a': - href = [v for k, v in attrs if k == 'href'] - if href: - self.in_a = True - self.url = href[0] - elif tag == 'img' and self.in_a: - src = [v for k, v in attrs if k == 'src'] - if src: - self.text += f' [image:{src[0]}] ' - - def handle_endtag(self, tag): - if tag == 'a' and self.in_a: - self.urls.append((self.text[:256], self.url)) - self.in_a = False - self.text = '' - self.url = '' - - def handle_data(self, data): - if self.in_a: - self.text += data - - -class ImageLister(Lister): - - def handle_starttag(self, tag, attrs): - if tag == 'img': - src = [v for k, v in attrs if k == 'src'] - if src: - self.urls.append(('', src[0])) - - -class AnchorLister(HTMLParser): - def __init__(self): - self.names = [] - HTMLParser.__init__(self) - - def reset(self): - HTMLParser.reset(self) - self.names = [] - - def handle_starttag(self, tag, attributes): - name = [v for k, v in attributes if k == 'id'] - if name: - self.names.append(name[0]) - if tag == 'a': - name = [v for k, v in attributes if k == 'name'] - if name: - self.names.append(name[0]) - - -def parse(obj, field, parser): - html = getattr(obj, field) - if html: - parser.feed(html) - parser.close() - return parser.urls - else: - return [] - - -def parse_urls(obj, field): - parser = URLLister() - return parse(obj, field, parser) - - -def parse_images(obj, field): - parser = ImageLister() - return parse(obj, field, parser) - - -def parse_anchors(content): - parser = AnchorLister() - if not isinstance(content, str): - content = str(content) - parser.feed(content) - parser.close() - return parser.names - - -class Linklist: - - html_fields = [] - url_fields = [] - ignore_empty = [] - image_fields = [] - - # You can override object_filter and object_exclude in a linklist class. - # Just provide a dictionary to be used as a Django lookup filter. - # Only objects that pass the filter will be queried for links. - # This doesn't affect whether an object is regarded as a valid link target. Only as a link source. - # Example usage in your linklists.py: - # object_filter = {'active': True} - Would only check active objects for links - - object_filter = None - object_exclude = None - filter_callable = None - - def __get(self, name, obj, default=None): - try: - attr = getattr(self, name) - except AttributeError: - return default - if callable(attr): - return attr(obj) - return attr - - @staticmethod - def extract_url_from_field(obj, field_name): - val = getattr(obj, field_name) - try: - try: - url = val.url # FileField and ImageField have a url property - except ValueError: # And it throws an exception for empty fields - url = '' - except AttributeError: - url = val # Assume the field returns the url directly - - return url or '' # Coerce None to '' - - def get_urls_from_field_list(self, obj, field_list): - urls = [] - for field_name in field_list: - url = self.extract_url_from_field(obj, field_name) - if field_name in self.ignore_empty and not url: - continue - urls.append((field_name, '', url)) - return urls - - def urls(self, obj): - - urls = [] - - # Look for HREFS in HTML fields - for field_name in self.html_fields: - urls += [(field_name, text, url) for text, url in parse_urls(obj, field_name)] - - # Now add in the URL fields - urls += self.get_urls_from_field_list(obj, self.url_fields) - - return urls - - def images(self, obj): - - urls = [] - - # Look for IMGs in HTML fields - for field_name in self.html_fields: - urls += [(field_name, text, url) for text, url in parse_images(obj, field_name)] - - # hostname_length = settings.MEDIA_URL[:-1].rfind('/') - # url[hostname_length:] - - # Now add in the image fields - urls += self.get_urls_from_field_list(obj, self.image_fields) - - return urls - - @classmethod - def objects(cls): - - objects = cls.model.objects.all() - - if cls.object_filter: - objects = objects.filter(**cls.object_filter).distinct() - if cls.object_exclude: - objects = objects.exclude(**cls.object_exclude).distinct() - if cls.filter_callable: - objects = cls.filter_callable(objects) - return objects - - def get_linklist(self, extra_filter=None): - - extra_filter = extra_filter or {} - - linklist = [] - objects = self.objects() - - if extra_filter: - objects = objects.filter(**extra_filter) - - for obj in objects: - linklist.append({ - 'object': obj, - 'urls': self.urls(obj), - 'images': self.images(obj), - }) - - return linklist - - @classmethod - def content_type(cls): - from django.contrib.contenttypes.models import ContentType - return ContentType.objects.get_for_model(cls.model) diff --git a/linkcheck/admin_blocks.py b/linkcheck/admin_blocks.py deleted file mode 100644 index e800fb3..0000000 --- a/linkcheck/admin_blocks.py +++ /dev/null @@ -1,14 +0,0 @@ -import django_admin_blocks - -from linkcheck.views import get_status_message - -"""Legacy internal helper""" - - -def notification(): - return get_status_message() - - -django_admin_blocks.register({ - 'errors': (notification,), -}) diff --git a/linkcheck/apps.py b/linkcheck/apps.py deleted file mode 100644 index 89442c0..0000000 --- a/linkcheck/apps.py +++ /dev/null @@ -1,62 +0,0 @@ -import importlib - -from django.apps import AppConfig, apps -from django.db.models.signals import post_delete - - -class AlreadyRegistered(Exception): - pass - - -class BaseLinkcheckConfig(AppConfig): - name = 'linkcheck' - verbose_name = "Linkcheck" - - default_auto_field = "django.db.models.BigAutoField" - - all_linklists = {} - - def ready(self): - self.build_linklists() - - def build_linklists(self): - """Autodiscovery of linkLists""" - for app in apps.get_app_configs(): - module_name = f"{app.name}.linklists" - try: - if not importlib.util.find_spec(module_name): - continue - except ModuleNotFoundError: - continue - the_module = importlib.import_module(module_name) - try: - for k in the_module.linklists.keys(): - if k in self.all_linklists.keys(): - raise AlreadyRegistered(f'The key {k} is already registered in all_linklists') - - for link_list in the_module.linklists.values(): - for link_list2 in self.all_linklists.values(): - if link_list.model == link_list2.model: - raise AlreadyRegistered(f"The LinkList {link_list} is already registered in all_linklists") - self.all_linklists.update(the_module.linklists) - except AttributeError: - pass - # Add a reference to the linklist in the model. This change is for internal hash link, - # But might also be useful elsewhere in the future - for key, linklist in self.all_linklists.items(): - setattr(linklist.model, '_linklist', linklist) - - -class LinkcheckConfig(BaseLinkcheckConfig): - default = True - - def ready(self): - from .linkcheck_settings import DISABLE_LISTENERS - from .listeners import register_listeners - super().ready() - - if not DISABLE_LISTENERS: - register_listeners() - - from .models import Link, link_post_delete - post_delete.connect(link_post_delete, sender=Link) diff --git a/linkcheck/build_meta.py b/linkcheck/build_meta.py deleted file mode 100644 index a75ee56..0000000 --- a/linkcheck/build_meta.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -import subprocess - -from setuptools import build_meta as default -from setuptools.build_meta import * # noqa: F401, F403 - - -def compile_translation_files(): - print("Compiling translation files...") - subprocess.run(["django-admin", "compilemessages"], cwd="linkcheck") - - -def should_compile_translation_files(): - skip_translations = os.environ.get("LINKCHECK_SKIP_TRANSLATIONS") - if skip_translations and skip_translations.lower() in ("1", "true", "yes", "t", "y"): - return False - - return True - - -def build_sdist(sdist_directory, config_settings=None): - if should_compile_translation_files(): - compile_translation_files() - - return default.build_sdist(sdist_directory, config_settings) - - -def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): - if should_compile_translation_files(): - compile_translation_files() - - return default.build_wheel( - wheel_directory, - config_settings=config_settings, - metadata_directory=metadata_directory, - ) diff --git a/linkcheck/cron.py b/linkcheck/cron.py deleted file mode 100644 index ec30a0b..0000000 --- a/linkcheck/cron.py +++ /dev/null @@ -1,53 +0,0 @@ -# This file works with our fork of django-cron. -# It's use is optional -# Use any means you like to run scheduled jobs. -# -# Note - you only need to run scheduled jobs if you want to check external links -# that may have died since the link was last edited -# -# Links are checked via signals any time a link-containing object is saved by Django - -from django_cron import WEEK, Job, cronScheduler - -from linkcheck.linkcheck_settings import ( - EXTERNAL_RECHECK_INTERVAL, - MAX_CHECKS_PER_RUN, -) -from linkcheck.utils import check_links, find_all_links - - -class RunLinkCheckFind(Job): - - run_every = WEEK - - def job(self): - find_all_links() - - -cronScheduler.register(RunLinkCheckFind) - - -class RunLinkCheckInternal(Job): - - run_every = WEEK - - def job(self): - check_links(limit=MAX_CHECKS_PER_RUN, check_external=False) - - -cronScheduler.register(RunLinkCheckInternal) - - -class RunLinkCheckExternal(Job): - - run_every = WEEK - - def job(self): - check_links( - external_recheck_interval=EXTERNAL_RECHECK_INTERVAL, - limit=MAX_CHECKS_PER_RUN, - check_internal=False, - ) - - -cronScheduler.register(RunLinkCheckExternal) diff --git a/linkcheck/dashboard.py b/linkcheck/dashboard.py deleted file mode 100644 index 721bad3..0000000 --- a/linkcheck/dashboard.py +++ /dev/null @@ -1,15 +0,0 @@ -from admin_tools.dashboard import modules -from django.urls import reverse - -from linkcheck.views import get_status_message - -linkcheck_dashboard_module = modules.LinkList( - title="Linkchecker", - pre_content=get_status_message, - children=( - {'title': 'Valid links', 'url': reverse('linkcheck_report') + '?filters=show_valid'}, - {'title': 'Broken links', 'url': reverse('linkcheck_report')}, - {'title': 'Untested links', 'url': reverse('linkcheck_report') + '?filters=show_unchecked'}, - {'title': 'Ignored links', 'url': reverse('linkcheck_report') + '?filters=ignored'}, - ) -) diff --git a/linkcheck/filebrowser.py b/linkcheck/filebrowser.py deleted file mode 100644 index fe10e6e..0000000 --- a/linkcheck/filebrowser.py +++ /dev/null @@ -1,140 +0,0 @@ -"""Integrate with django-filebrowser if present.""" -import logging -import os.path - -from django.conf import settings -from django.contrib import messages -from django.utils.translation import gettext as _ -from django.utils.translation import ngettext - -try: - from filebrowser.settings import DIRECTORY - from filebrowser.signals import ( - filebrowser_post_delete, - filebrowser_post_rename, - filebrowser_post_upload, - ) - FILEBROWSER_PRESENT = True -except ImportError: - FILEBROWSER_PRESENT = False - -from linkcheck.models import Url - -logger = logging.getLogger(__name__) - - -def get_relative_media_url(): - if settings.MEDIA_URL.startswith('http'): - relative_media_url = ('/'+'/'.join(settings.MEDIA_URL.split('/')[3:]))[:-1] - else: - relative_media_url = settings.MEDIA_URL - return relative_media_url - - -def handle_upload(sender, path=None, **kwargs): - logger.debug('uploaded path %s with kwargs %r', path, kwargs) - - url = os.path.join(get_relative_media_url(), kwargs['file'].url) - url_qs = Url.objects.filter(url=url).filter(status=False) - count = url_qs.count() - if count: - url_qs.update(status=True, message="Working document link") - msg = ngettext( - "Uploading {} has corrected {} broken link.", - "Uploading {} has corrected {} broken links.", - count, - ).format(url, count) - messages.success(sender, '{}: {} {}'.format( - _('Please note'), - msg, - _('See the Link Checker for more details.') - )) - - -def handle_rename(sender, path=None, **kwargs): - logger.debug('renamed path %s with kwargs %r', path, kwargs) - - def isdir(filename): - if filename.count('.'): - return False - else: - return True - - old_url = os.path.join(get_relative_media_url(), DIRECTORY, path) - new_url = os.path.join(get_relative_media_url(), DIRECTORY, path.replace(kwargs['name'], kwargs['new_name'])) - # Renaming a file will cause it's urls to become invalid - # Renaming a directory will cause the urls of all it's contents to become invalid - old_url_qs = Url.objects.filter(url=old_url).filter(status=True) - if isdir(kwargs['name']): - old_url_qs = Url.objects.filter(url__startswith=old_url).filter(status=True) - old_count = old_url_qs.count() - if old_count: - old_url_qs.update(status=False, message="Missing Document") - msg = ngettext( - "Renaming {} has caused {} link to break.", - "Renaming {} has caused {} links to break.", - old_count, - ).format(old_url, old_count) - messages.warning(sender, '{}: {} {}'.format( - _('Warning'), - msg, - _('Please use the Link Checker to fix them.') - )) - - # The new directory may fix some invalid links, so we also check for that - if isdir(kwargs['new_name']): - new_count = 0 - new_url_qs = Url.objects.filter(url__startswith=new_url).filter(status=False) - for url in new_url_qs: - if url.check_url(): - new_count += 1 - else: - new_url_qs = Url.objects.filter(url=new_url).filter(status=False) - new_count = new_url_qs.count() - if new_count: - new_url_qs.update(status=True, message='Working document link') - if new_count: - msg = ngettext( - "Renaming {} has corrected {} broken link.", - "Renaming {} has corrected {} broken links.", - new_count, - ).format(new_url, new_count) - messages.success(sender, '{}: {} {}'.format( - _('Please note'), - msg, - _('See the Link Checker for more details.') - )) - - -def handle_delete(sender, path=None, **kwargs): - logger.debug('deleted path %s with kwargs %r', path, kwargs) - - url = os.path.join(get_relative_media_url(), DIRECTORY, path) - url_qs = Url.objects.filter(url=url).filter(status=True) - count = url_qs.count() - if count: - url_qs.update(status=False, message="Missing Document") - msg = ngettext( - "Deleting {} has caused {} link to break.", - "Deleting {} has caused {} links to break.", - count, - ).format(url, count) - messages.warning(sender, '{}: {} {}'.format( - _('Warning'), - msg, - _('Please use the Link Checker to fix them.') - )) - - -def register_listeners(): - if FILEBROWSER_PRESENT: - filebrowser_post_upload.connect(handle_upload) - filebrowser_post_rename.connect(handle_rename) - filebrowser_post_delete.connect(handle_delete) - - -def unregister_listeners(): - if FILEBROWSER_PRESENT: - filebrowser_post_upload.disconnect(handle_upload) - filebrowser_post_rename.disconnect(handle_rename) - filebrowser_post_delete.disconnect(handle_delete) diff --git a/linkcheck/linkcheck_settings.py b/linkcheck/linkcheck_settings.py deleted file mode 100644 index 617aef4..0000000 --- a/linkcheck/linkcheck_settings.py +++ /dev/null @@ -1,63 +0,0 @@ -from django.conf import settings -from django.db import models - -# Used for coverage view - -DEFAULT_HTML_FIELD_CLASSES = [] -DEFAULT_IMAGE_FIELD_CLASSES = [models.ImageField] -DEFAULT_URL_FIELD_CLASSES = [models.FileField] - - -# The coverage view warns you if you use any fields that haven't been registered with Linkcheck when they should have -# Let's add a few likely candidates. You can add your own via the LINKCHECK_EXTRA_xxx_FIELD_CLASSES setting -# Pull requests welcome - -try: - from sorl.thumbnail import ImageField - DEFAULT_IMAGE_FIELD_CLASSES.append(ImageField) -except ImportError: - pass - -try: - from mcefield.custom_fields import MCEField - DEFAULT_HTML_FIELD_CLASSES.append(MCEField) -except ImportError: - pass - -try: - from select_url_field.fields import SelectURLField - DEFAULT_URL_FIELD_CLASSES.append(SelectURLField) -except ImportError: - pass - -try: - from filebrowser.fields import FileBrowseField - DEFAULT_URL_FIELD_CLASSES.append(FileBrowseField) -except ImportError: - pass - -try: - from browse_and_upload_field.fields import FileBrowseAndUploadField - DEFAULT_URL_FIELD_CLASSES.append(FileBrowseAndUploadField) -except ImportError: - pass - - -HTML_FIELD_CLASSES = getattr(settings, 'LINKCHECK_EXTRA_HTML_FIELD_CLASSES', []) + DEFAULT_HTML_FIELD_CLASSES -IMAGE_FIELD_CLASSES = getattr(settings, 'LINKCHECK_EXTRA_IMAGE_FIELD_CLASSES', []) + DEFAULT_IMAGE_FIELD_CLASSES -URL_FIELD_CLASSES = getattr(settings, 'LINKCHECK_EXTRA_URL_FIELD_CLASSES', []) + DEFAULT_URL_FIELD_CLASSES - -# Main (non-coverage related) settings - -EXTERNAL_RECHECK_INTERVAL = getattr(settings, 'LINKCHECK_EXTERNAL_RECHECK_INTERVAL', 10080) # 1 week -EXTERNAL_REGEX_STRING = getattr(settings, 'LINKCHECK_EXTERNAL_REGEX_STRING', r'^https?://') -LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT = getattr(settings, 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT', 10) -MAX_CHECKS_PER_RUN = getattr(settings, 'LINKCHECK_MAX_CHECKS_PER_RUN', -1) -MAX_URL_LENGTH = getattr(settings, 'LINKCHECK_MAX_URL_LENGTH', 255) -MEDIA_PREFIX = getattr(settings, 'LINKCHECK_MEDIA_PREFIX', settings.MEDIA_URL) -RESULTS_PER_PAGE = getattr(settings, 'LINKCHECK_RESULTS_PER_PAGE', 500) -SITE_DOMAINS = getattr(settings, 'LINKCHECK_SITE_DOMAINS', []) -DISABLE_LISTENERS = getattr(settings, 'LINKCHECK_DISABLE_LISTENERS', False) -TOLERATE_BROKEN_ANCHOR = getattr(settings, 'LINKCHECK_TOLERATE_BROKEN_ANCHOR', True) -PROXIES = getattr(settings, 'LINKCHECK_PROXIES', {}) -TRUST_PROXY_SSL = getattr(settings, 'LINKCHECK_TRUST_PROXY_SSL', False) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py deleted file mode 100644 index 0642aeb..0000000 --- a/linkcheck/listeners.py +++ /dev/null @@ -1,243 +0,0 @@ -import logging -import sys -import time -from contextlib import contextmanager -from queue import Empty, LifoQueue -from threading import Thread - -from django.apps import apps -from django.db.models import signals as model_signals - -from linkcheck.models import Link, Url - -from . import filebrowser, update_lock -from .linkcheck_settings import MAX_URL_LENGTH - -logger = logging.getLogger(__name__) - - -tasks_queue = LifoQueue() -worker_running = False -tests_running = len(sys.argv) > 1 and sys.argv[1] == 'test' or sys.argv[0].endswith('runtests.py') - - -def linkcheck_worker(block=True): - global worker_running # noqa - while tasks_queue.not_empty: - try: - task = tasks_queue.get(block=block) - except Empty: - break - # An error in any task should not stop the worker from continuing with the queue - try: - task['target'](*task['args'], **task['kwargs']) - except Exception as e: - logger.exception( - "%s while running %s with args=%r and kwargs=%r: %s", - type(e).__name__, - task['target'].__name__, - task['args'], - task['kwargs'], - e - ) - tasks_queue.task_done() - worker_running = False - - -def start_worker(): - global worker_running # noqa - if worker_running is False: - worker_running = True - t = Thread(target=linkcheck_worker) - t.daemon = True - t.start() - - -def check_instance_links(sender, instance, **kwargs): - """ - When an object is saved: - new Link/Urls are created, checked - - When an object is modified: - new link/urls are created, checked - existing link/urls are checked - Removed links are deleted - """ - linklist_cls = sender._linklist - - def do_check_instance_links(sender, instance, wait=False): - # On some installations, this wait time might be enough for the - # thread transaction to account for the object change (GH #41). - # A candidate for the future post_commit signal. - - global worker_running # noqa - - if wait: - time.sleep(0.1) - with update_lock: - content_type = linklist_cls.content_type() - new_links = [] - old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) - - linklists = linklist_cls().get_linklist(extra_filter={'pk': instance.pk}) - - if not linklists: - # This object is no longer watched by linkcheck according to object_filter - links = [] - else: - linklist = linklists[0] - links = linklist['urls']+linklist['images'] - - for link in links: - # url structure = (field, link text, url) - url = link[2] - if url.startswith('#'): - url = instance.get_absolute_url() + url - - if len(url) > MAX_URL_LENGTH: - # We cannot handle url longer than MAX_URL_LENGTH at the moment - logger.warning('URL exceeding max length will be skipped: %s', url) - continue - - u, created = Url.objects.get_or_create(url=url) - l, created = Link.objects.get_or_create( - url=u, field=link[0], text=link[1], content_type=content_type, object_id=instance.pk - ) - new_links.append(l.id) - u.check_url() - - gone_links = old_links.exclude(id__in=new_links) - gone_links.delete() - - # Don't run in a separate thread if we are running tests - if tests_running: - do_check_instance_links(sender, instance) - else: - tasks_queue.put({ - 'target': do_check_instance_links, - 'args': (sender, instance, True), - 'kwargs': {} - }) - start_worker() - - -def delete_instance_links(sender, instance, **kwargs): - """ - Delete all links belonging to a model instance when that instance is deleted - """ - linklist_cls = sender._linklist - content_type = linklist_cls.content_type() - old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) - old_links.delete() - - -def instance_pre_save(sender, instance, raw=False, **kwargs): - if instance._state.adding or not instance.pk or raw: - # Ignore unsaved instances or raw imports - return - current_url = instance.get_absolute_url() - previous_url = sender.objects.get(pk=instance.pk).get_absolute_url() - setattr(instance, '__previous_url', previous_url) - if previous_url == current_url: - return - else: - if previous_url is not None: - old_urls = Url.objects.filter(url__startswith=previous_url) - old_urls.update(status=False, message='Broken internal link') - if current_url is not None: - new_urls = Url.objects.filter(url__startswith=current_url) - # Mark these urls' status as False, so that post_save will check them - new_urls.update(status=False, message='Should be checked now!') - - -def instance_post_save(sender, instance, **kwargs): - # Ignore raw imports - if kwargs.get('raw'): - return - - def do_instance_post_save(sender, instance, **kwargs): - current_url = instance.get_absolute_url() - previous_url = getattr(instance, '__previous_url', None) - # We assume returning None from get_absolute_url means that this instance doesn't have a URL - # Not sure if we should do the same for '' as this could refer to '/' - if current_url is not None and current_url != previous_url: - linklist_cls = sender._linklist - active = linklist_cls.objects().filter(pk=instance.pk).count() - - if kwargs['created'] or (not active): - new_urls = Url.objects.filter(url__startswith=current_url) - else: - new_urls = Url.objects.filter(status=False).filter(url__startswith=current_url) - - if new_urls: - for url in new_urls: - url.check_url() - - if tests_running: - do_instance_post_save(sender, instance, **kwargs) - else: - tasks_queue.put({ - 'target': do_instance_post_save, - 'args': (sender, instance), - 'kwargs': kwargs - }) - start_worker() - - -def instance_pre_delete(sender, instance, **kwargs): - instance.linkcheck_deleting = True - deleted_url = instance.get_absolute_url() - if deleted_url: - old_urls = Url.objects.filter(url__startswith=deleted_url).exclude(status=False) - if old_urls: - old_urls.update(status=False, message='Broken internal link') - - -def register_listeners(): - # 1. register listeners for the objects that contain Links - for linklist_name, linklist_cls in apps.get_app_config('linkcheck').all_linklists.items(): - model_signals.post_save.connect(check_instance_links, sender=linklist_cls.model) - model_signals.post_delete.connect(delete_instance_links, sender=linklist_cls.model) - - # 2. register listeners for the objects that are targets of Links, - # only when get_absolute_url() is defined for the model - if getattr(linklist_cls.model, 'get_absolute_url', None): - model_signals.pre_save.connect(instance_pre_save, sender=linklist_cls.model) - model_signals.post_save.connect(instance_post_save, sender=linklist_cls.model) - model_signals.pre_delete.connect(instance_pre_delete, sender=linklist_cls.model) - - filebrowser.register_listeners() - - -def unregister_listeners(): - # 1. register listeners for the objects that contain Links - for linklist_name, linklist_cls in apps.get_app_config('linkcheck').all_linklists.items(): - model_signals.post_save.disconnect(check_instance_links, sender=linklist_cls.model) - model_signals.post_delete.disconnect(delete_instance_links, sender=linklist_cls.model) - - # 2. register listeners for the objects that are targets of Links, - # only when get_absolute_url() is defined for the model - if getattr(linklist_cls.model, 'get_absolute_url', None): - model_signals.pre_save.disconnect(instance_pre_save, sender=linklist_cls.model) - model_signals.post_save.disconnect(instance_post_save, sender=linklist_cls.model) - model_signals.pre_delete.disconnect(instance_pre_delete, sender=linklist_cls.model) - - filebrowser.unregister_listeners() - - -@contextmanager -def enable_listeners(*args, **kwargs): - register_listeners() - try: - yield - finally: - unregister_listeners() - - -@contextmanager -def disable_listeners(*args, **kwargs): - unregister_listeners() - try: - yield - finally: - register_listeners() diff --git a/linkcheck/locale/de/LC_MESSAGES/django.po b/linkcheck/locale/de/LC_MESSAGES/django.po deleted file mode 100644 index 3e1bc89..0000000 --- a/linkcheck/locale/de/LC_MESSAGES/django.po +++ /dev/null @@ -1,272 +0,0 @@ -msgid "" -msgstr "" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2023-02-28 23:01+0100\n" -"Language: German\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" - -#: filebrowser.py:43 -msgid "Uploading {} has corrected {} broken link." -msgid_plural "Uploading {} has corrected {} broken links." -msgstr[0] "Das Hochladen von {} hat {} fehlerhaften Link korrigiert." -msgstr[1] "Das Hochladen von {} hat {} fehlerhafte Links korrigiert." - -#: filebrowser.py:48 filebrowser.py:103 -msgid "Please note" -msgstr "Bitte beachten Sie" - -#: filebrowser.py:50 filebrowser.py:105 -msgid "See the Link Checker for more details." -msgstr "Weitere Einzelheiten finden Sie im Link Checker." - -#: filebrowser.py:74 -msgid "Renaming {} has caused {} link to break." -msgid_plural "Renaming {} has caused {} links to break." -msgstr[0] "" -"Das Umbenennen von {} hat dazu geführt, dass {} Link nicht mehr funktioniert." -msgstr[1] "" -"Das Umbenennen von {} hat dazu geführt, dass {} Links nicht mehr " -"funktionieren." - -#: filebrowser.py:79 filebrowser.py:123 -msgid "Warning" -msgstr "Warnung" - -#: filebrowser.py:81 filebrowser.py:125 -msgid "Please use the Link Checker to fix them." -msgstr "Bitte verwenden Sie den Link Checker, um sie zu korrigieren." - -#: filebrowser.py:98 -msgid "Renaming {} has corrected {} broken link." -msgid_plural "Renaming {} has corrected {} broken links." -msgstr[0] "Das Umbenennen von {} hat {} fehlerhaften Link korrigiert." -msgstr[1] "Das Umbenennen von {} hat {} fehlerhafte Links korrigiert." - -#: filebrowser.py:118 -msgid "Deleting {} has caused {} link to break." -msgid_plural "Deleting {} has caused {} links to break." -msgstr[0] "" -"Das Löschen von {} hat dazu geführt, dass {} Link nicht mehr funktioniert." -msgstr[1] "" -"Das Löschen von {} hat dazu geführt, dass {} Links nicht mehr funktionieren." - -#: models.py:118 -msgid "Working empty anchor" -msgstr "Funktionierender leerer Anker" - -#: models.py:120 -msgid "Anchor could not be checked" -msgstr "Anker konnte nicht geprüft werden" - -#: models.py:122 -msgid "Broken anchor" -msgstr "Ungültiger Anker" - -#: models.py:123 -msgid "Working anchor" -msgstr "Funktionierender Anker" - -#: models.py:130 -msgid "Insecure link" -msgstr "Unsicherer Link" - -#: models.py:132 -msgid "SSL certificate could not be checked" -msgstr "SSL-Zertifikat konnte nicht überprüft werden" - -#: models.py:134 -msgid "Broken SSL certificate" -msgstr "Fehlerhaftes SSL-Zertifikat" - -#: models.py:135 -msgid "Valid SSL certificate" -msgstr "Valides SSL-Zertifikat" - -#: models.py:140 -msgid "URL Not Yet Checked" -msgstr "URL noch nicht geprüft" - -#: models.py:142 -msgid "Empty link" -msgstr "Leerer Link" - -#: models.py:144 -msgid "Invalid URL" -msgstr "Ungültige URL" - -#: models.py:146 -msgid "Email link" -msgstr "Email-Link" - -#: models.py:146 models.py:148 models.py:150 -msgid "not automatically checked" -msgstr "nicht automatisch geprüft" - -#: models.py:148 -msgid "Phone number link" -msgstr "Telefonnummern-Link" - -#: models.py:150 -msgid "Anchor link" -msgstr "Anker-Link" - -#: models.py:152 -msgid "Working file link" -msgstr "Funktionierender Datei-Link" - -#: models.py:152 -msgid "Missing file" -msgstr "Fehlende Datei" - -#: models.py:156 -msgid "Working external link" -msgstr "Funktionierender externer Link" - -#: models.py:156 -msgid "Working internal link" -msgstr "Funktionierender interner Link" - -#: models.py:160 -msgid "Working permanent redirect" -msgstr "Funktionierende dauerhafte Weiterleitung" - -#: models.py:160 -msgid "Working temporary redirect" -msgstr "Funktionierende temporäre Weiterleitung" - -#: models.py:162 -msgid "Broken permanent redirect" -msgstr "Fehlerhafte dauerhafte Weiterleitung" - -#: models.py:162 -msgid "Broken temporary redirect" -msgstr "Fehlerhafte temporäre Weiterleitung" - -#: models.py:163 -msgid "Broken external link" -msgstr "Fehlerhafter externer Link" - -#: models.py:163 -msgid "Broken internal link" -msgstr "Fehlerhafter interner Link" - -#: templates/linkcheck/base_linkcheck.html:5 -#: templates/linkcheck/base_linkcheck.html:11 -#: templates/linkcheck/base_linkcheck.html:17 -#: templates/linkcheck/coverage.html:14 -msgid "Link Checker" -msgstr "" - -#: templates/linkcheck/base_linkcheck.html:10 -#: templates/linkcheck/coverage.html:13 -msgid "Home" -msgstr "" - -#: templates/linkcheck/coverage.html:8 templates/linkcheck/coverage.html:15 -msgid "Coverage" -msgstr "Abdeckung" - -#: templates/linkcheck/coverage.html:22 -msgid "Model" -msgstr "Datenbank-Modell" - -#: templates/linkcheck/coverage.html:23 -msgid "Covered" -msgstr "Überprüft" - -#: templates/linkcheck/coverage.html:24 -msgid "Suggested config" -msgstr "Empfohlene Konfiguration" - -#: templates/linkcheck/coverage.html:30 -msgid "Yes,No" -msgstr "Ja,Nein" - -#: templates/linkcheck/paginator.html:7 -msgid "First" -msgstr "Erste" - -#: templates/linkcheck/paginator.html:11 templates/linkcheck/paginator.html:13 -msgid "Previous" -msgstr "Vorherige" - -#: templates/linkcheck/paginator.html:17 -#, python-format -msgid "Page %(current)s of %(max)s" -msgstr "Seite %(current)s von %(max)s" - -#: templates/linkcheck/paginator.html:21 templates/linkcheck/paginator.html:23 -msgid "Next" -msgstr "Nächste" - -#: templates/linkcheck/paginator.html:27 templates/linkcheck/paginator.html:29 -msgid "Last" -msgstr "Letze" - -#: templates/linkcheck/report.html:125 -msgid "Show" -msgstr "Anzeigen" - -#: templates/linkcheck/report.html:126 views.py:83 -msgid "Valid links" -msgstr "Gültige Links" - -#: templates/linkcheck/report.html:127 views.py:92 -msgid "Broken links" -msgstr "Ungültige Links" - -#: templates/linkcheck/report.html:128 views.py:86 -msgid "Untested links" -msgstr "Ungetestete Links" - -#: templates/linkcheck/report.html:129 views.py:89 -msgid "Ignored links" -msgstr "Ignorierte Links" - -#: templates/linkcheck/report.html:140 -#, python-format -msgid "View %(content_type_name)s" -msgstr "%(content_type_name)s anzeigen" - -#: templates/linkcheck/report.html:141 -#, python-format -msgid "Edit %(content_type_name)s" -msgstr "%(content_type_name)s bearbeiten" - -#: templates/linkcheck/report.html:143 -msgid "Destination" -msgstr "Ziel" - -#: templates/linkcheck/report.html:144 -msgid "Linked Text" -msgstr "Link-Text" - -#: templates/linkcheck/report.html:145 -msgid "Field to edit" -msgstr "Zu bearbeitendes Feld" - -#: templates/linkcheck/report.html:146 -msgid "Status" -msgstr "" - -#: templates/linkcheck/report.html:157 -msgid "Recheck" -msgstr "Erneut prüfen" - -#: templates/linkcheck/report.html:164 -msgid "Ignore" -msgstr "Ignorieren" - -#: templates/linkcheck/report.html:166 -msgid "Unignore" -msgstr "Nicht ignorieren" - -#: templates/linkcheck/report.html:173 -msgid "Redirects to" -msgstr "Leitet weiter zu" - -#~ msgid "Link to section on same page" -#~ msgstr "Link zu Abschnitt auf derselben Seite" diff --git a/linkcheck/locale/fr/LC_MESSAGES/django.po b/linkcheck/locale/fr/LC_MESSAGES/django.po deleted file mode 100644 index 5c7e7c0..0000000 --- a/linkcheck/locale/fr/LC_MESSAGES/django.po +++ /dev/null @@ -1,175 +0,0 @@ -# This file is distributed under the same license as the django-linkcheck package. -# Paroz Claude , 2023 -# -msgid "" -msgstr "" -"Project-Id-Version: django-linkcheck master\n" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2023-02-05 11:05+0100\n" -"PO-Revision-Date: 2023-02-05 12:00+0100\n" -"Last-Translator: Paroz Claude \n" -"Language-Team: French\n" -"Language: fr\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=2; plural=(n > 1);\n" - -#: linkcheck/filebrowser.py:43 -msgid "Uploading {} has corrected {} broken link." -msgid_plural "Uploading {} has corrected {} broken links." -msgstr[0] "L’envoi de {} a corrigé {} lien brisé." -msgstr[1] "L’envoi de {} a corrigé {} liens brisés." - -#: linkcheck/filebrowser.py:48 linkcheck/filebrowser.py:103 -msgid "Please note" -msgstr "Prenez note" - -#: linkcheck/filebrowser.py:50 linkcheck/filebrowser.py:105 -msgid "See the Link Checker for more details." -msgstr "Consultez le Contrôleur de liens pour plus de détails." - -#: linkcheck/filebrowser.py:74 -msgid "Renaming {} has caused {} link to break." -msgid_plural "Renaming {} has caused {} links to break." -msgstr[0] "Le renommage de {} a brisé {} lien." -msgstr[1] "Le renommage de {} a brisé {} liens." - -#: linkcheck/filebrowser.py:79 linkcheck/filebrowser.py:123 -msgid "Warning" -msgstr "Avertissement" - -#: linkcheck/filebrowser.py:81 linkcheck/filebrowser.py:125 -msgid "Please use the Link Checker to fix them." -msgstr "Veuillez utiliser le Contrôleur de liens pour les corriger." - -#: linkcheck/filebrowser.py:98 -msgid "Renaming {} has corrected {} broken link." -msgid_plural "Renaming {} has corrected {} broken links." -msgstr[0] "Le renommage de {} a corrigé {} lien brisé." -msgstr[1] "Le renommage de {} a corrigé {} liens brisés." - -#: linkcheck/filebrowser.py:118 -msgid "Deleting {} has caused {} link to break." -msgid_plural "Deleting {} has caused {} links to break." -msgstr[0] "La suppression de {} a brisé {} lien." -msgstr[1] "La suppression de {} a brisé {} liens." - -#: linkcheck/templates/linkcheck/base_linkcheck.html:5 -#: linkcheck/templates/linkcheck/base_linkcheck.html:11 -#: linkcheck/templates/linkcheck/base_linkcheck.html:17 -#: linkcheck/templates/linkcheck/coverage.html:14 -msgid "Link Checker" -msgstr "Contrôleur de liens" - -#: linkcheck/templates/linkcheck/base_linkcheck.html:10 -#: linkcheck/templates/linkcheck/coverage.html:13 -msgid "Home" -msgstr "Accueil" - -#: linkcheck/templates/linkcheck/coverage.html:8 -#: linkcheck/templates/linkcheck/coverage.html:15 -msgid "Coverage" -msgstr "Couverture" - -#: linkcheck/templates/linkcheck/coverage.html:22 -msgid "Model" -msgstr "Modèle" - -#: linkcheck/templates/linkcheck/coverage.html:23 -msgid "Covered" -msgstr "Couvert" - -#: linkcheck/templates/linkcheck/coverage.html:24 -msgid "Suggested config" -msgstr "Configuration suggérée" - -#: linkcheck/templates/linkcheck/coverage.html:30 -msgid "Yes,No" -msgstr "Oui,Non" - -#: linkcheck/templates/linkcheck/paginator.html:7 -msgid "First" -msgstr "Première" - -#: linkcheck/templates/linkcheck/paginator.html:11 -#: linkcheck/templates/linkcheck/paginator.html:13 -msgid "Previous" -msgstr "Précédente" - -#: linkcheck/templates/linkcheck/paginator.html:17 -#, python-format -msgid "Page %(current)s of %(max)s" -msgstr "Page %(current)s sur %(max)s" - -#: linkcheck/templates/linkcheck/paginator.html:21 -#: linkcheck/templates/linkcheck/paginator.html:23 -msgid "Next" -msgstr "Suivante" - -#: linkcheck/templates/linkcheck/paginator.html:27 -#: linkcheck/templates/linkcheck/paginator.html:29 -msgid "Last" -msgstr "Dernière" - -#: linkcheck/templates/linkcheck/report.html:125 -msgid "Show" -msgstr "Afficher" - -#: linkcheck/templates/linkcheck/report.html:126 linkcheck/views.py:83 -msgid "Valid links" -msgstr "Liens valables" - -#: linkcheck/templates/linkcheck/report.html:127 linkcheck/views.py:92 -msgid "Broken links" -msgstr "Liens brisés" - -#: linkcheck/templates/linkcheck/report.html:128 linkcheck/views.py:86 -msgid "Untested links" -msgstr "Liens non testés" - -#: linkcheck/templates/linkcheck/report.html:129 linkcheck/views.py:89 -msgid "Ignored links" -msgstr "Liens ignorés" - -#: linkcheck/templates/linkcheck/report.html:140 -#, python-format -msgid "View %(content_type_name)s" -msgstr "Voir %(content_type_name)s" - -#: linkcheck/templates/linkcheck/report.html:141 -#, python-format -msgid "Edit %(content_type_name)s" -msgstr "Modifier %(content_type_name)s" - -#: linkcheck/templates/linkcheck/report.html:143 -msgid "Destination" -msgstr "Destination" - -#: linkcheck/templates/linkcheck/report.html:144 -msgid "Linked Text" -msgstr "Texte de lien" - -#: linkcheck/templates/linkcheck/report.html:145 -msgid "Field to edit" -msgstr "Champ à modifier" - -#: linkcheck/templates/linkcheck/report.html:146 -msgid "Status" -msgstr "Statut" - -#: linkcheck/templates/linkcheck/report.html:157 -msgid "Recheck" -msgstr "Recontrôler" - -#: linkcheck/templates/linkcheck/report.html:164 -msgid "Ignore" -msgstr "Ignorer" - -#: linkcheck/templates/linkcheck/report.html:166 -msgid "Unignore" -msgstr "Ne plus ignorer" - -#: linkcheck/templates/linkcheck/report.html:173 -msgid "Redirects to" -msgstr "Redirige vers" diff --git a/linkcheck/management/__init__.py b/linkcheck/management/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/linkcheck/management/commands/__init__.py b/linkcheck/management/commands/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/linkcheck/management/commands/checkexternal.py b/linkcheck/management/commands/checkexternal.py deleted file mode 100644 index 9ee1f25..0000000 --- a/linkcheck/management/commands/checkexternal.py +++ /dev/null @@ -1,35 +0,0 @@ -from django.core.management.base import BaseCommand - -from linkcheck.linkcheck_settings import ( - EXTERNAL_RECHECK_INTERVAL, - MAX_CHECKS_PER_RUN, -) -from linkcheck.utils import check_links - - -class Command(BaseCommand): - - help = 'Check and record external link status' - - def add_arguments(self, parser): - parser.add_argument( - '-e', '--externalinterval', type=int, - help='Specifies the length of time in minutes until external links are rechecked. ' - 'Defaults to linkcheck_config setting' - ) - parser.add_argument( - '-l', '--limit', type=int, - help='Specifies the maximum number (int) of links to be checked. ' - 'Defaults to linkcheck_config setting. Value less than 1 will check all' - ) - - def handle(self, *args, **options): - externalinterval = options['externalinterval'] or EXTERNAL_RECHECK_INTERVAL - limit = options.get('limit', None) or MAX_CHECKS_PER_RUN - - self.stdout.write(f"Checking all external links that haven't been tested for {externalinterval} minutes.") - if limit != -1: - self.stdout.write(f"Will run maximum of {limit} checks this run.") - - check_count = check_links(external_recheck_interval=externalinterval, limit=limit, check_internal=False) - return f"{check_count} external URLs have been checked." diff --git a/linkcheck/management/commands/checkinternal.py b/linkcheck/management/commands/checkinternal.py deleted file mode 100644 index a6d0989..0000000 --- a/linkcheck/management/commands/checkinternal.py +++ /dev/null @@ -1,25 +0,0 @@ -from django.core.management.base import BaseCommand - -from linkcheck.linkcheck_settings import MAX_CHECKS_PER_RUN -from linkcheck.utils import check_links - - -class Command(BaseCommand): - - help = 'Check and record internal link status' - - def add_arguments(self, parser): - parser.add_argument( - '-l', '--limit', type=int, - help='Specifies the maximum number (int) of links to be checked. ' - 'Defaults to linkcheck_config setting. Value less than 1 will check all') - - def handle(self, *args, **options): - limit = options.get('limit', None) or MAX_CHECKS_PER_RUN - - self.stdout.write("Checking all internal links.") - if limit != -1: - self.stdout.write(f"Will run maximum of {limit} checks this run.") - - check_count = check_links(limit=limit, check_external=False) - return f"{check_count} internal URLs have been checked." diff --git a/linkcheck/management/commands/checklinks.py b/linkcheck/management/commands/checklinks.py deleted file mode 100644 index b7678e1..0000000 --- a/linkcheck/management/commands/checklinks.py +++ /dev/null @@ -1,36 +0,0 @@ -from django.core.management.base import BaseCommand - -from linkcheck.linkcheck_settings import ( - EXTERNAL_RECHECK_INTERVAL, - MAX_CHECKS_PER_RUN, -) -from linkcheck.utils import check_links - - -class Command(BaseCommand): - - help = 'Check and record internal and external link status' - - def add_arguments(self, parser): - parser.add_argument( - '-e', '--externalinterval', type=int, - help='Specifies the length of time in minutes until external links are rechecked. ' - 'Defaults to linkcheck_config setting' - ) - parser.add_argument( - '-l', '--limit', type=int, - help='Specifies the maximum number (int) of links to be checked. ' - 'Defaults to linkcheck_config setting. Value less than 1 will check all' - ) - - def handle(self, *args, **options): - externalinterval = options['externalinterval'] or EXTERNAL_RECHECK_INTERVAL - limit = options['limit'] or MAX_CHECKS_PER_RUN - - self.stdout.write(f"Checking all links that haven't been tested for {externalinterval} minutes.") - if limit != -1: - self.stdout.write(f"Will run maximum of {limit} checks this run.") - - internal_checked = check_links(limit=limit, check_external=False) - external_checked = check_links(external_recheck_interval=externalinterval, limit=limit, check_internal=False) - return f"{internal_checked} internal URLs and {external_checked} external URLs have been checked." diff --git a/linkcheck/management/commands/findlinks.py b/linkcheck/management/commands/findlinks.py deleted file mode 100644 index eb6ce73..0000000 --- a/linkcheck/management/commands/findlinks.py +++ /dev/null @@ -1,20 +0,0 @@ -from django.core.management.base import BaseCommand - -from linkcheck.utils import find_all_links - - -class Command(BaseCommand): - - help = ( - "Goes through all models registered with Linkcheck, records any new links found" - "and removes all outdated links" - ) - - def handle(self, *args, **options): - self.stdout.write("Updating all links...") - return "\n".join( - [ - f"{model.capitalize()}: {', '.join([f'{count} {label}' for label, count in data.items()])}" - for model, data in find_all_links().items() - ] - ) diff --git a/linkcheck/management/commands/linkcheck_suggest_config.py b/linkcheck/management/commands/linkcheck_suggest_config.py deleted file mode 100644 index 7b955af..0000000 --- a/linkcheck/management/commands/linkcheck_suggest_config.py +++ /dev/null @@ -1,35 +0,0 @@ -from django.apps import apps -from django.core.management.base import BaseCommand, CommandError -from django.utils.termcolors import make_style - -from linkcheck.utils import get_coverage_data, get_suggested_linklist_config - - -class Command(BaseCommand): - - cyan = staticmethod(make_style(fg='cyan')) - - help = 'Go through all models and check whether they are registered with linkcheck' - - def add_arguments(self, parser): - parser.add_argument( - '--model', - help="Generate the suggested config for this model", - ) - - def handle(self, *args, model, **options): - if model: - try: - model_class = apps.get_model(model) - except Exception as e: - raise CommandError( - f'Model "{model}" does not exist.' - ) from e - self.stdout.write(get_suggested_linklist_config(model_class)) - else: - covered, uncovered = get_coverage_data() - self.stdout.write('All covered models:\n') - self.stdout.write(', '.join(map(self.cyan, covered))) - for model, suggested_config in uncovered: - self.stdout.write(f'\nSuggested config for model {model}:') - self.stdout.write(self.cyan(suggested_config)) diff --git a/linkcheck/management/commands/unignore_links.py b/linkcheck/management/commands/unignore_links.py deleted file mode 100644 index f2c29ed..0000000 --- a/linkcheck/management/commands/unignore_links.py +++ /dev/null @@ -1,12 +0,0 @@ -from django.core.management.base import BaseCommand - -from linkcheck.utils import unignore - - -class Command(BaseCommand): - - help = "Updates the `ignore` status of all links to `False`" - - def execute(self, *args, **options): - print("Unignoring all links") - unignore() diff --git a/linkcheck/migrations/0001_initial.py b/linkcheck/migrations/0001_initial.py deleted file mode 100644 index dc213f5..0000000 --- a/linkcheck/migrations/0001_initial.py +++ /dev/null @@ -1,38 +0,0 @@ -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('contenttypes', '0001_initial'), - ] - - operations = [ - migrations.CreateModel( - name='Link', - fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('object_id', models.PositiveIntegerField()), - ('field', models.CharField(max_length=128)), - ('text', models.CharField(default='', max_length=256)), - ('ignore', models.BooleanField(default=False)), - ('content_type', models.ForeignKey(to='contenttypes.ContentType', on_delete=models.CASCADE)), - ], - ), - migrations.CreateModel( - name='Url', - fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('url', models.CharField(unique=True, max_length=255)), - ('last_checked', models.DateTimeField(null=True, blank=True)), - ('status', models.BooleanField(null=True)), - ('message', models.CharField(max_length=1024, null=True, blank=True)), - ('still_exists', models.BooleanField(default=False)), - ], - ), - migrations.AddField( - model_name='link', - name='url', - field=models.ForeignKey(related_name='links', to='linkcheck.Url', on_delete=models.CASCADE), - ), - ] diff --git a/linkcheck/migrations/0002_url_redirect_to.py b/linkcheck/migrations/0002_url_redirect_to.py deleted file mode 100644 index 33238c4..0000000 --- a/linkcheck/migrations/0002_url_redirect_to.py +++ /dev/null @@ -1,16 +0,0 @@ -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('linkcheck', '0001_initial'), - ] - - operations = [ - migrations.AddField( - model_name='url', - name='redirect_to', - field=models.CharField(default='', max_length=255), - ), - ] diff --git a/linkcheck/migrations/0003_redirect_to_as_textfield.py b/linkcheck/migrations/0003_redirect_to_as_textfield.py deleted file mode 100644 index 891bd21..0000000 --- a/linkcheck/migrations/0003_redirect_to_as_textfield.py +++ /dev/null @@ -1,16 +0,0 @@ -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('linkcheck', '0002_url_redirect_to'), - ] - - operations = [ - migrations.AlterField( - model_name='url', - name='redirect_to', - field=models.TextField(blank=True), - ), - ] diff --git a/linkcheck/migrations/0004_remove_url_still_exists.py b/linkcheck/migrations/0004_remove_url_still_exists.py deleted file mode 100644 index 147ff5d..0000000 --- a/linkcheck/migrations/0004_remove_url_still_exists.py +++ /dev/null @@ -1,15 +0,0 @@ -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - ('linkcheck', '0003_redirect_to_as_textfield'), - ] - - operations = [ - migrations.RemoveField( - model_name='url', - name='still_exists', - ), - ] diff --git a/linkcheck/migrations/0005_default_big_auto_field.py b/linkcheck/migrations/0005_default_big_auto_field.py deleted file mode 100644 index 046c0a8..0000000 --- a/linkcheck/migrations/0005_default_big_auto_field.py +++ /dev/null @@ -1,25 +0,0 @@ -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('linkcheck', '0004_remove_url_still_exists'), - ] - - operations = [ - migrations.AlterField( - model_name='link', - name='id', - field=models.BigAutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name='ID' - ), - ), - migrations.AlterField( - model_name='url', - name='id', - field=models.BigAutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name='ID' - ), - ), - ] diff --git a/linkcheck/migrations/0006_url_add_status_code.py b/linkcheck/migrations/0006_url_add_status_code.py deleted file mode 100644 index a6dbd55..0000000 --- a/linkcheck/migrations/0006_url_add_status_code.py +++ /dev/null @@ -1,21 +0,0 @@ -from django.db import migrations, models - -from linkcheck.models import STATUS_CODE_CHOICES - - -class Migration(migrations.Migration): - - dependencies = [ - ("linkcheck", "0005_default_big_auto_field"), - ] - - operations = [ - migrations.AddField( - model_name="url", - name="status_code", - field=models.IntegerField( - choices=STATUS_CODE_CHOICES, - null=True, - ), - ), - ] diff --git a/linkcheck/migrations/0007_url_add_redirect_status_code.py b/linkcheck/migrations/0007_url_add_redirect_status_code.py deleted file mode 100644 index 2567adb..0000000 --- a/linkcheck/migrations/0007_url_add_redirect_status_code.py +++ /dev/null @@ -1,21 +0,0 @@ -from django.db import migrations, models - -from linkcheck.models import STATUS_CODE_CHOICES - - -class Migration(migrations.Migration): - - dependencies = [ - ("linkcheck", "0006_url_add_status_code"), - ] - - operations = [ - migrations.AddField( - model_name="url", - name="redirect_status_code", - field=models.IntegerField( - choices=STATUS_CODE_CHOICES, - null=True, - ), - ), - ] diff --git a/linkcheck/migrations/0008_url_add_anchor_status.py b/linkcheck/migrations/0008_url_add_anchor_status.py deleted file mode 100644 index fc964a5..0000000 --- a/linkcheck/migrations/0008_url_add_anchor_status.py +++ /dev/null @@ -1,16 +0,0 @@ -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("linkcheck", "0007_url_add_redirect_status_code"), - ] - - operations = [ - migrations.AddField( - model_name="url", - name="anchor_status", - field=models.BooleanField(null=True), - ), - ] diff --git a/linkcheck/migrations/0009_url_add_ssl_status.py b/linkcheck/migrations/0009_url_add_ssl_status.py deleted file mode 100644 index e4ac56b..0000000 --- a/linkcheck/migrations/0009_url_add_ssl_status.py +++ /dev/null @@ -1,16 +0,0 @@ -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("linkcheck", "0008_url_add_anchor_status"), - ] - - operations = [ - migrations.AddField( - model_name="url", - name="ssl_status", - field=models.BooleanField(null=True), - ), - ] diff --git a/linkcheck/migrations/0010_url_add_error_message.py b/linkcheck/migrations/0010_url_add_error_message.py deleted file mode 100644 index 92580f3..0000000 --- a/linkcheck/migrations/0010_url_add_error_message.py +++ /dev/null @@ -1,16 +0,0 @@ -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('linkcheck', '0009_url_add_ssl_status'), - ] - - operations = [ - migrations.AddField( - model_name='url', - name='error_message', - field=models.CharField(blank=True, default='', max_length=1024), - ), - ] diff --git a/linkcheck/migrations/0011_link_add_content_object_index.py b/linkcheck/migrations/0011_link_add_content_object_index.py deleted file mode 100644 index 39d636d..0000000 --- a/linkcheck/migrations/0011_link_add_content_object_index.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 5.1.3 on 2024-11-25 18:00 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('contenttypes', '0002_remove_content_type_name'), - ('linkcheck', '0010_url_add_error_message'), - ] - - operations = [ - migrations.AddIndex( - model_name='link', - index=models.Index(fields=['content_type', 'object_id'], name='content_type_and_object_id'), - ), - ] diff --git a/linkcheck/migrations/__init__.py b/linkcheck/migrations/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/linkcheck/models.py b/linkcheck/models.py deleted file mode 100644 index d7c189f..0000000 --- a/linkcheck/models.py +++ /dev/null @@ -1,616 +0,0 @@ -import logging -import os.path -import re -from datetime import timedelta -from http import HTTPStatus -from urllib.parse import unquote, urlparse - -import requests -from django.conf import settings -from django.contrib.contenttypes.fields import GenericForeignKey -from django.contrib.contenttypes.models import ContentType -from django.db import models -from django.test.client import Client -from django.test.utils import modify_settings -from django.utils.encoding import iri_to_uri -from django.utils.functional import cached_property -from django.utils.timezone import now -from django.utils.translation import gettext as _ -from requests.exceptions import ConnectionError, ReadTimeout - -try: - from reversion.revisions import revision_context_manager - - USE_REVERSION = True -except ImportError: - USE_REVERSION = False - -from .linkcheck_settings import ( - EXTERNAL_RECHECK_INTERVAL, - EXTERNAL_REGEX_STRING, - LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, - MAX_URL_LENGTH, - MEDIA_PREFIX, - PROXIES, - SITE_DOMAINS, - TOLERATE_BROKEN_ANCHOR, - TRUST_PROXY_SSL, -) - -logger = logging.getLogger(__name__) - - -EXTERNAL_REGEX = re.compile(EXTERNAL_REGEX_STRING) - - -def html_decode(s): - """ - Returns the ASCII decoded version of the given HTML string. This does - NOT remove normal HTML tags like

. - """ - html_codes = (("'", "'"), ('"', """), (">", ">"), ("<", "<"), ("&", "&")) - for code in html_codes: - s = s.replace(code[1], code[0]) - return s - - -STATUS_CODE_CHOICES = [(s.value, f"{s.value} {s.phrase}") for s in HTTPStatus] -DEFAULT_USER_AGENT = f"{settings.SITE_DOMAIN} Linkchecker" -FALLBACK_USER_AGENT = ( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36" -) - - -class Url(models.Model): - """ - Represents a distinct URL found somewhere in the models registered with linkcheck - A single Url can have multiple Links associated with it. - """ - - # See http://www.boutell.com/newfaq/misc/urllength.html - url = models.CharField(max_length=MAX_URL_LENGTH, unique=True) - last_checked = models.DateTimeField(blank=True, null=True) - anchor_status = models.BooleanField(null=True) - ssl_status = models.BooleanField(null=True) - status = models.BooleanField(null=True) - status_code = models.IntegerField(choices=STATUS_CODE_CHOICES, null=True) - redirect_status_code = models.IntegerField(choices=STATUS_CODE_CHOICES, null=True) - message = models.CharField(max_length=1024, blank=True, null=True) - error_message = models.CharField(max_length=1024, default="", blank=True) - redirect_to = models.TextField(blank=True) - - @property - def redirect_ok(self): - return self.redirect_status_code < 300 if self.redirect_status_code else None - - @property - def type(self): - if self.external: - return "external" - if self.url.startswith("mailto:"): - return "mailto" - if self.url.startswith("tel:"): - return "phone" - elif self.internal_url == "": - return "empty" - elif self.internal_url.startswith("#"): - return "anchor" - elif self.internal_url.startswith(MEDIA_PREFIX): - return "file" - elif self.internal_url.startswith("/"): - return "internal" - else: - return "invalid" - - @property - def has_anchor(self): - return "#" in self.url - - @property - def anchor(self): - return self.url.split("#")[1] if self.has_anchor else None - - @property - def anchor_message(self): - if not self.has_anchor or not self.last_checked: - return "" - if self.anchor == "": - return _("Working empty anchor") - if self.anchor_status is None: - return _("Anchor could not be checked") - elif self.anchor_status is False: - return _("Broken anchor") - return _("Working anchor") - - @property - def ssl_message(self): - if self.internal: - return "" - if self.external_url.startswith("http://"): - return _("Insecure link") - if self.ssl_status is None: - return _("SSL certificate could not be checked") - elif self.ssl_status is False: - return _("Broken SSL certificate") - return _("Valid SSL certificate") - - @property - def get_message(self): - if not self.last_checked and self.status is None: - return _("URL Not Yet Checked") - elif self.type == "empty": - return _("Empty link") - elif self.type == "invalid": - return _("Invalid URL") - elif self.type == "mailto": - return "{} ({})".format(_("Email link"), _("not automatically checked")) - elif self.type == "phone": - return "{} ({})".format(_("Phone number link"), _("not automatically checked")) - elif self.type == "anchor": - return "{} ({})".format(_("Anchor link"), _("not automatically checked")) - elif self.type == "file": - return _("Working file link") if self.status else _("Missing file") - elif not self.status_code: - return self.error_message - elif self.status_code < 300: - return _("Working external link") if self.external else _("Working internal link") - elif self.status_code < 400: - permanent = self.status_code in [HTTPStatus.MOVED_PERMANENTLY, HTTPStatus.PERMANENT_REDIRECT] - if self.redirect_ok: - return _("Working permanent redirect") if permanent else _("Working temporary redirect") - else: - return _("Broken permanent redirect") if permanent else _("Broken temporary redirect") - return _("Broken external link") if self.external else _("Broken internal link") - - @property - def colour(self): - if not self.last_checked: - return "blue" - elif self.status is True: - return "green" - else: - return "red" - - def __str__(self): - return self.url - - def __repr__(self): - return f"" - - @cached_property - def internal_url(http://github.com/self): - """ - Remove current domain from URLs as the test client chokes when trying to test them during a page save - They shouldn't generally exist but occasionally slip through - If settings.SITE_DOMAINS isn't set then use settings.SITE_DOMAIN - but also check for variants: example.org, www.example.org, test.example.org - - In case the URLs is external, `None` is returned. - """ - - # If the URL is not external, directly return it without processing - if not EXTERNAL_REGEX.match(self.url): - return self.url - - # May receive transformation before being checked - prepared_url = self.url - - internal_exceptions = [] - if SITE_DOMAINS: # If the setting is present - internal_exceptions = SITE_DOMAINS - elif getattr(settings, "SITE_DOMAIN", None): # try using SITE_DOMAIN - root_domain = settings.SITE_DOMAIN - if root_domain.startswith("www."): - root_domain = root_domain[4:] - elif root_domain.startswith("test."): - root_domain = root_domain[5:] - internal_exceptions = [ - f"{protocol}://{sub}{root_domain}" for sub in ["", "www.", "test."] for protocol in ["http", "https"] - ] - - for ex in internal_exceptions: - if ex and prepared_url.startswith(ex): - prepared_url = prepared_url.replace(ex, "", 1) - - # If the URL is still external, return `None` - if EXTERNAL_REGEX.match(prepared_url): - return None - - logger.debug("Internal URL: %s", prepared_url) - return prepared_url - - @cached_property - def external_url(http://github.com/self): - """ - Prepare an external URL to be checked with requests: - - Remove hash anchors - - Ensure correct encoding - """ - # If the URL is internal, return `None` - if self.internal: - return None - - # Encode path and query and remove anchor fragment - parsed = urlparse(self.url) - external_url = parsed._replace( - path=iri_to_uri(parsed.path), query=iri_to_uri(parsed.query), fragment="" - ).geturl() - - logger.debug("External URL: %s", external_url) - return external_url - - @property - def internal(self): - """ - Check whether this URL is internal - """ - return self.internal_url is not None - - @property - def external(self): - """ - Check whether this URL is external - """ - return not self.internal - - def reset_for_check(self): - """ - Reset all fields which depend on the status after checking a URL. - This is done to ensure that results from the last check do not remain if the fields are not overwritten. - """ - # Reset all database fields - self.anchor_status = None - self.status = None - self.status_code = None - self.redirect_status_code = None - self.ssl_status = None - self.error_message = "" - self.message = "" - - def check_url(http://github.com/self,%20check_internal=True,%20check_external=True,%20external_recheck_interval=EXTERNAL_RECHECK_INTERVAL): - """ - Return: - * True if the link was checked and found valid - * False if the link was checked and found invalid - * None if the link was not checked - """ - - if check_internal and self.internal: - return self.check_internal() - elif check_external and self.external: - return self.check_external(external_recheck_interval) - else: - return None - - def check_internal(self): - """ - Check an internal URL - """ - if not self.internal: - logger.info("URL %r is not internal", self) - return None - - logger.debug("checking internal link: %s", self.internal_url) - - # Reset all fields in case they were already set - self.reset_for_check() - - from linkcheck.utils import LinkCheckHandler - - if self.type == "empty": - self.status = False - self.message = "Empty link" - - elif self.type == "mailto": - self.message = "Email link (not automatically checked)" - - elif self.type == "phone": - self.message = "Phone number (not automatically checked)" - - elif self.type == "anchor": - self.message = "Link to within the same page (not automatically checked)" - - elif self.type == "file": - # TODO: Assumes a direct mapping from media url to local filesystem path. - # This will break quite easily for alternate setups - path = settings.MEDIA_ROOT + unquote(self.internal_url)[len(MEDIA_PREFIX) - 1:] - decoded_path = html_decode(path) - self.status = os.path.exists(path) or os.path.exists(decoded_path) - self.message = "Working file link" if self.status else "Missing Document" - - elif self.type == "internal": - old_prepend_setting = settings.PREPEND_WWW - settings.PREPEND_WWW = False - c = Client() - c.handler = LinkCheckHandler() - with modify_settings(ALLOWED_HOSTS={"append": "testserver"}): - response = c.get(self.internal_url) - self.status_code = response.status_code - if response.status_code < 300: - self.message = "Working internal link" - self.status = True - elif response.status_code < 400: - initial_location = response.get("Location") - redirect_type = "permanent" if response.status_code == 301 else "temporary" - with modify_settings(ALLOWED_HOSTS={"append": "testserver"}): - response = c.get(self.internal_url, follow=True) - if response.redirect_chain: - self.redirect_to, _ = response.redirect_chain[-1] - else: - self.redirect_to = initial_location - self.redirect_status_code = response.status_code - self.status = response.status_code < 300 - redirect_result = "Working" if self.status else "Broken" - self.message = f"{redirect_result} {redirect_type} redirect" - else: - self.status = False - self.message = "Broken internal link" - - # Check the anchor (if it exists) - self.check_anchor(response.content) - - settings.PREPEND_WWW = old_prepend_setting - else: - self.status = False - self.message = "Invalid URL" - - if USE_REVERSION: - # using test client will clear the RevisionContextManager stack. - revision_context_manager.start() - - self.last_checked = now() - self.save() - return self.status - - def check_external(self, external_recheck_interval=EXTERNAL_RECHECK_INTERVAL): - """ - Check an external URL - """ - if not self.external: - logger.info("URL %r is not external", self) - return None - - logger.info("checking external link: %s", self.url) - external_recheck_datetime = now() - timedelta(minutes=external_recheck_interval) - - if self.last_checked and (self.last_checked > external_recheck_datetime): - logger.debug( - "URL was last checked in the last %s minutes, so not checking it again", external_recheck_interval - ) - return self.status - - # Reset all fields in case they were already set - self.reset_for_check() - - request_params = { - "allow_redirects": True, - "headers": {"User-Agent": DEFAULT_USER_AGENT}, - "timeout": LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, - "verify": True, - } - if PROXIES: - request_params["verify"] = not TRUST_PROXY_SSL - request_params["proxies"] = PROXIES - - try: - try: - # At first try a HEAD request - fetch = requests.head - response = fetch(self.external_url, **request_params) - # If no exceptions occur, the SSL certificate is valid - if self.external_url.startswith("https://"): - self.ssl_status = True - except ConnectionError as e: - # This error could also be caused by an incomplete root certificate bundle, - # so let's retry without verifying the certificate - if "unable to get local issuer certificate" in str(e): - request_params["verify"] = False - response = fetch(self.external_url, **request_params) - else: - # Re-raise exception if it's definitely not a false positive - raise - # If HEAD is not allowed, let's try with GET - if response.status_code in [HTTPStatus.BAD_REQUEST, HTTPStatus.METHOD_NOT_ALLOWED]: - logger.debug("HEAD is not allowed, retry with GET") - fetch = requests.get - response = fetch(self.external_url, **request_params) - # If access is denied, possibly the user agent is blocked - if response.status_code == HTTPStatus.FORBIDDEN: - logger.debug("Forbidden, retry with different user agent") - request_params["headers"] = {"User-Agent": FALLBACK_USER_AGENT} - response = fetch(self.external_url, **request_params) - # If URL contains hash anchor and is a valid HTML document, let's repeat with GET - elif ( - self.has_anchor - and response.ok - and fetch == requests.head - and "text/html" in response.headers.get("content-type") - ): - logger.debug("Retrieve content for anchor check") - fetch = requests.get - response = fetch(self.external_url, **request_params) - except ReadTimeout: - self.status = False - self.message = "Other Error: The read operation timed out" - self.error_message = "The read operation timed out" - except ConnectionError as e: - self.status = False - self.message = self.error_message = format_connection_error(e) - if "SSLError" in str(e): - self.ssl_status = False - except Exception as e: - self.status = False - self.message = f"Other Error: {e}" - self.error_message = str(e) - else: - self.status = response.status_code < 300 - self.message = f"{response.status_code} {response.reason}" - logger.debug("Response message: %s", self.message) - - # If initial response was a redirect, return the initial return code - if response.history: - logger.debug("Redirect history: %r", response.history) - if response.ok: - self.message = f"{response.history[0].status_code} {response.history[0].reason}" - self.redirect_to = response.url - self.redirect_status_code = response.status_code - self.status_code = response.history[0].status_code - else: - self.status_code = response.status_code - - # Check the anchor (if it exists) - if fetch == requests.get: - self.check_anchor(response.text) - if not request_params["verify"]: - self.message += ", SSL certificate could not be verified" - - # When a rate limit was hit or the server returned an internal error, do not update - # the last_checked date so the result is not cached for EXTERNAL_RECHECK_INTERVAL minutes - if not self.status_code or self.status_code != HTTPStatus.TOO_MANY_REQUESTS and self.status_code < 500: - self.last_checked = now() - self.save() - return self.status - - def check_anchor(self, html): - from linkcheck import parse_anchors - - scope = "internal" if self.internal else "external" - - # Only check when the URL contains an anchor - if self.has_anchor: - # Empty fragment '#' is always valid - if not self.anchor: - self.anchor_status = True - self.message += f", working {scope} hash anchor" - else: - try: - names = parse_anchors(html) - # Known possible errors include: AssertionError, NotImplementedError, UnicodeDecodeError - except Exception as e: - logger.debug("%s while parsing anchors: %s", type(e).__name__, e) - self.message += ", failed to parse HTML for anchor" - if not TOLERATE_BROKEN_ANCHOR: - self.status = False - else: - if self.anchor in names: - self.anchor_status = True - self.message += f", working {scope} hash anchor" - else: - self.anchor_status = False - self.message += f", broken {scope} hash anchor" - if not TOLERATE_BROKEN_ANCHOR: - self.status = False - return self.anchor_status, self.anchor_message - - -class Link(models.Model): - """ - A Link represents a specific URL in a specific field in a specific model - It can be come from a single field such as a URLField or a field containing multiple links - Such as a HTML or Rich Text field. - Multiple Links can reference a single Url - """ - - content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) - object_id = models.PositiveIntegerField() - content_object = GenericForeignKey("content_type", "object_id") - field = models.CharField(max_length=128) - url = models.ForeignKey(Url, related_name="links", on_delete=models.CASCADE) - text = models.CharField(max_length=256, default="") - ignore = models.BooleanField(default=False) - - class Meta: - indexes = [ - models.Index(fields=["content_type", "object_id"], name="content_type_and_object_id"), - ] - - @property - def display_url(http://github.com/self): - # when page /test/ has a anchor link to /test/#anchor, we display it - # as "#anchor" rather than "/test/#anchor" - if self.url.url.count("#") and hasattr(self.content_object, "get_absolute_url"): - url_part, anchor_part = self.url.url.split("#") - absolute_url = self.content_object.get_absolute_url() - if url_part == absolute_url: - return "#" + anchor_part - return self.url.url - - def __str__(self): - return f"{self.url.url} ({self.content_object})" - - def __repr__(self): - return f"" - - -def link_post_delete(sender, instance, **kwargs): - try: - # url.delete() => link.delete() => link_post_delete - # in this case link.url is already deleted from db, so we need a try here. - url = instance.url - count = url.links.all().count() - if count == 0: - logger.debug("This was the last link for %r, so deleting it", url) - url.delete() - except Url.DoesNotExist: - pass - - -def format_connection_error(e): - """ - Helper function to provide better readable output of connection errors - """ - # If the exception message is wrapped in an "HTTPSConnectionPool", only give the underlying cause - reason = re.search(r"\(Caused by ([a-zA-Z]+\(.+\))\)", str(e)) - if not reason: - return f"Connection Error: {e}" - reason = reason[1] - # If the underlying cause is a new connection error, provide additional formatting - if reason.startswith("NewConnectionError"): - return format_new_connection_error(reason) - # If the underlying cause is a name resolution error, provide additional formatting - if reason.startswith("NameResolutionError"): - return format_name_resolution_error(reason) - # If the underlying cause is an SSL error, provide additional formatting - if reason.startswith("SSLError"): - return format_ssl_error(reason) - return f"Connection Error: {reason}" - - -def format_new_connection_error(reason): - """ - Helper function to provide better readable output of new connection errors thrown by urllib3 - """ - connection_reason = re.search( - r"NewConnectionError\(': (.+)'\)", - reason, - ) - if connection_reason: - return f"New Connection Error: {connection_reason[1]}" - return reason - - -def format_name_resolution_error(reason): - """ - Helper function to provide better readable output of name resolution errors thrown by urllib3 - """ - resolution_reason = re.search( - r"NameResolutionError\([\"']: (.+)[\"']\)", - reason, - ) - if resolution_reason: - return f"Name Resolution Error: {resolution_reason[1]}" - return reason - - -def format_ssl_error(reason): - """ - Helper function to provide better readable output of SSL errors thrown by urllib3 - """ - ssl_reason = re.search(r"SSLError\([a-zA-Z]+\((.+)\)\)", reason) - if ssl_reason: - # If the reason lies withing the ssl c library, hide additional debug output - ssl_c_reason = re.search(r"1, '\[SSL: [A-Z\d_]+\] (.+) \(_ssl\.c:\d+\)'", ssl_reason[1]) - if ssl_c_reason: - return f"SSL Error: {ssl_c_reason[1]}" - return f"SSL Error: {ssl_reason[1]}" - return reason diff --git a/linkcheck/templates/linkcheck/base_linkcheck.html b/linkcheck/templates/linkcheck/base_linkcheck.html deleted file mode 100644 index 03bdcd8..0000000 --- a/linkcheck/templates/linkcheck/base_linkcheck.html +++ /dev/null @@ -1,23 +0,0 @@ -{% extends "admin/change_list.html" %} -{% load i18n %} - -{% block title %} - {% translate "Link Checker" %} {{ block.super }} -{% endblock %} - -{% block breadcrumbs %} -

-{% endblock %} - -{% block content %} -
-

{% translate "Link Checker" %}

-
- {% block innercontent %} - {% endblock %} -
-
-{% endblock %} diff --git a/linkcheck/templates/linkcheck/paginator.html b/linkcheck/templates/linkcheck/paginator.html deleted file mode 100644 index d48eb0d..0000000 --- a/linkcheck/templates/linkcheck/paginator.html +++ /dev/null @@ -1,32 +0,0 @@ -{% load i18n %} -
- - {% if pages.number > 1 %} - < First - {% else %} - < {% translate "First" %} - {% endif %} - - {% if pages.has_previous %} - << {% translate "Previous" %} - {% else %} - << {% translate "Previous" %} - {% endif %} - - - {% blocktrans with current=pages.number max=pages.paginator.num_pages %}Page {{ current }} of {{ max }}{% endblocktrans %} - - - {% if pages.has_next %} - {% translate "Next" %} >> - {% else %} - {% translate "Next" %} >> - {% endif %} - - {% if pages.number != pages.paginator.num_pages %} - {% translate "Last" %} > - {% else %} - {% translate "Last" %} > - {% endif %} - -
diff --git a/linkcheck/templates/linkcheck/report.html b/linkcheck/templates/linkcheck/report.html deleted file mode 100644 index 1734170..0000000 --- a/linkcheck/templates/linkcheck/report.html +++ /dev/null @@ -1,190 +0,0 @@ -{% extends "linkcheck/base_linkcheck.html" %} -{% load i18n %} -{% load linkcheck_model_tags %} -{% block extrahead %} -{{ block.super }} - - - -{% endblock %} - -{% block innercontent %} - -
- {% translate "Show" %}:   - {% if filter == 'show_valid' %}{% translate "Valid links" %}{% else %}{% translate "Valid links" %}{% endif %}   - {% if filter == 'show_invalid' %}{% translate "Broken links" %}{% else %}{% translate "Broken links" %}{% endif %}   - {% if filter == 'show_unchecked' %}{% translate "Untested links" %}{% else %}{% translate "Untested links" %}{% endif %}   - {% if filter == 'ignored' %}{% translate "Ignored links" %}{% else %}{% translate "Ignored links" %}{% endif %} - ({{ ignored_count }}) -
-
- - {% if content_types_list %} - {% for content_type in content_types_list %} - - -

{{content_type.content_type|get_verbose_name_plural}}

- {% for object in content_type.object_list %} -
-

{{report_type}} in '{{object.object}}'

   - {% blocktrans with content_type_name=content_type.content_type.name %}View {{ content_type_name }}{% endblocktrans %}   - {% if object.admin_url %}{% blocktrans with content_type_name=content_type.content_type.name %}Edit {{ content_type_name }}{% endblocktrans %}{% endif %} - - - - - - - - {% for link in object.link_list %} - - - - - - - - - {% if link.url.redirect_to %} - - {% endif %} - {% endfor %} - -
- {% endfor %} -
- {% endfor %} - {% else %} -

No results

- {% endif %} - {% csrf_token %} - {% if content_types_list %} - {% include "linkcheck/paginator.html" %} - {% endif %} -{% endblock %} diff --git a/linkcheck/templatetags/__init__.py b/linkcheck/templatetags/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/linkcheck/templatetags/linkcheck_model_tags.py b/linkcheck/templatetags/linkcheck_model_tags.py deleted file mode 100644 index d9f91b7..0000000 --- a/linkcheck/templatetags/linkcheck_model_tags.py +++ /dev/null @@ -1,11 +0,0 @@ -from django import template - -register = template.Library() - - -@register.filter -def get_verbose_name_plural(content_type): - """ - Returns verbose_name_plural for a content type. - """ - return content_type.model_class()._meta.verbose_name_plural.title() diff --git a/linkcheck/tests/__init__.py b/linkcheck/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/linkcheck/tests/media/found b/linkcheck/tests/media/found deleted file mode 100644 index e69de29..0000000 diff --git a/linkcheck/tests/sampleapp/__init__.py b/linkcheck/tests/sampleapp/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/linkcheck/tests/sampleapp/fixture.json b/linkcheck/tests/sampleapp/fixture.json deleted file mode 100644 index 5eed736..0000000 --- a/linkcheck/tests/sampleapp/fixture.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - { - "model": "sampleapp.Page", - "pk": 1, - "fields": { - "book": 1 - } - }, - { - "model": "sampleapp.Book", - "pk": 1, - "fields": { - "title": "My Title", - "description": "My description" - } - } -] diff --git a/linkcheck/tests/sampleapp/linklists.py b/linkcheck/tests/sampleapp/linklists.py deleted file mode 100644 index e5d35ab..0000000 --- a/linkcheck/tests/sampleapp/linklists.py +++ /dev/null @@ -1,42 +0,0 @@ -from django.db.models import OuterRef, Subquery - -from linkcheck import Linklist -from linkcheck.tests.sampleapp.models import Author, Book, Journal, Page - - -class BookLinklist(Linklist): - """ Class to let linkcheck app discover fields containing links """ - model = Book - object_filter = {} - html_fields = ['description'] - - -class PageLinklist(Linklist): - """ Class to let linkcheck app discover fields containing links """ - model = Page - - -class AuthorLinklist(Linklist): - """ Class to let linkcheck app discover fields containing links """ - model = Author - object_filter = {} - url_fields = ['website'] - - -class JournalLinklist(Linklist): - """ Class to let linkcheck app discover fields containing links """ - model = Journal - html_fields = ['description'] - - @classmethod - def filter_callable(cls, objects): - latest = Journal.objects.filter(title=OuterRef('title')).order_by('-version') - return objects.filter(version=Subquery(latest.values('version')[:1])) - - -linklists = { - 'Books': BookLinklist, - 'Pages': PageLinklist, - 'Authors': AuthorLinklist, - 'Journals': JournalLinklist, -} diff --git a/linkcheck/tests/sampleapp/models.py b/linkcheck/tests/sampleapp/models.py deleted file mode 100644 index ebe102f..0000000 --- a/linkcheck/tests/sampleapp/models.py +++ /dev/null @@ -1,36 +0,0 @@ -from django.db import models - - -class Book(models.Model): - title = models.CharField(max_length=50) - description = models.TextField() - - def get_absolute_url(http://github.com/self): - return f"/book/{self.id}/" - - -class Page(models.Model): - book = models.ForeignKey(Book, on_delete=models.CASCADE) - - def get_absolute_url(http://github.com/self): - return f"/book/{self.book.id}/{self.id}" - - -class Author(models.Model): - # This model has purposefully no get_absolute_url - name = models.CharField(max_length=50) - website = models.URLField(blank=True) - - -class Journal(models.Model): - title = models.CharField(max_length=50) - description = models.TextField() - version = models.PositiveIntegerField(default=0) - - -class UncoveredModel(models.Model): - book = models.ForeignKey(Book, on_delete=models.CASCADE) - website = models.URLField(blank=True) - - def get_absolute_url(http://github.com/self): - return f'/uncovered/{self.id}' diff --git a/linkcheck/tests/sampleapp/views.py b/linkcheck/tests/sampleapp/views.py deleted file mode 100644 index dc05271..0000000 --- a/linkcheck/tests/sampleapp/views.py +++ /dev/null @@ -1,54 +0,0 @@ -import time - -from django.core.exceptions import PermissionDenied -from django.http import ( - HttpResponse, - HttpResponsePermanentRedirect, - HttpResponseRedirect, -) - - -def http_response(request, code): - return HttpResponse("", status=int(code)) - - -def http_response_get_only(request, code): - status = int(code) if request.method == 'HEAD' else 200 - return HttpResponse("", status=status) - - -def http_block_user_agent(request, block_head=False): - if block_head and request.method == 'HEAD': - return HttpResponse('', status=405) - if 'Linkchecker' in request.headers.get('User-Agent', ''): - raise PermissionDenied() - return HttpResponse('') - - -def http_redirect(request, code): - return HttpResponseRedirect("/http/200/", status=int(code)) - - -def http_redirect_to_404(request): - return HttpResponsePermanentRedirect("/http/404/") - - -def timeout(request): - time.sleep(2) - return HttpResponse("") - - -def http_response_with_anchor(request): - return HttpResponse("

Anchor

") - - -def http_redirect_to_anchor(request): - return HttpResponseRedirect("/http/anchor/") - - -def static_video(request): - return HttpResponse(b'', content_type='video/mp4') - - -def static_video_forged_content_type(request): - return HttpResponse(b': " - "Failed to resolve 'name-resolution-error.example.com' ([Errno -2] Name or service not known)\"))" - ) - mocked_url = 'https://name-resolution-error.example.com/' - mocker.register_uri('HEAD', mocked_url, exc=exc), - uv = Url(url=mocked_url) - uv.check_url() - formatted_message = ( - "Name Resolution Error: Failed to resolve 'name-resolution-error.example.com' " - "([Errno -2] Name or service not known)" - ) - self.assertEqual(uv.message, formatted_message) - self.assertEqual(uv.get_message, formatted_message) - self.assertEqual(uv.error_message, formatted_message) - self.assertEqual(uv.status, False) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'SSL certificate could not be checked') - self.assertEqual(uv.get_status_code_display(), None) - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_external_check_200_utf8(self): - uv = Url(url=f"{self.live_server_url}/http/200/r%C3%BCckmeldung/") - uv.check_url() - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_external_check_200_utf8_not_encoded(self): - uv = Url(url=f"{self.live_server_url}/http/200/rückmeldung/") - uv.check_url() - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - @requests_mock.Mocker() - def test_external_check_200_utf8_domain(self, mocker): - mocker.register_uri('HEAD', 'https://xn--utf8-test--z5a0txc.example.com/', reason='OK'), - uv = Url(url='https://xn--utf8-test--z5a0txc.example.com/') - uv.check_url() - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, True) - self.assertEqual(uv.ssl_message, 'Valid SSL certificate') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - @requests_mock.Mocker() - def test_external_check_200_punycode_domain(self, mocker): - punycode_domain = 'https://xn--utf8-test--z5a0txc.example.com/' - mocker.register_uri('HEAD', punycode_domain, reason='OK'), - uv = Url(url=punycode_domain) - uv.check_url() - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, True) - self.assertEqual(uv.ssl_message, 'Valid SSL certificate') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_external_check_301(self): - uv = Url(url=f"{self.live_server_url}/http/301/") - uv.check_url() - self.assertEqual(uv.message, '301 Moved Permanently') - self.assertEqual(uv.get_message, 'Broken permanent redirect') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, False) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '301 Moved Permanently') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_external_check_301_followed(self): - uv = Url(url=f"{self.live_server_url}/http/redirect/301/") - uv.check_url() - self.assertEqual(uv.message, '301 Moved Permanently') - self.assertEqual(uv.get_message, 'Working permanent redirect') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '301 Moved Permanently') - self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') - self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/200/') - self.assertEqual(uv.type, 'external') - - def test_external_check_302_followed(self): - uv = Url(url=f"{self.live_server_url}/http/redirect/302/") - uv.check_url() - self.assertEqual(uv.message, '302 Found') - self.assertEqual(uv.get_message, 'Working temporary redirect') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '302 Found') - self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') - self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/200/') - self.assertEqual(uv.type, 'external') - - def test_external_check_404(self): - uv = Url(url=f"{self.live_server_url}/whatever/") - uv.check_url() - self.assertEqual(uv.message, '404 Not Found') - self.assertEqual(uv.get_message, 'Broken external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, False) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '404 Not Found') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_external_check_redirect_final_404(self): - uv = Url(url=f"{self.live_server_url}/http/redirect_to_404/") - uv.check_url() - self.assertEqual(uv.message, '404 Not Found') - self.assertEqual(uv.get_message, 'Broken permanent redirect') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, False) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '301 Moved Permanently') - self.assertEqual(uv.get_redirect_status_code_display(), '404 Not Found') - self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/404/') - self.assertEqual(uv.type, 'external') - - def test_external_check_get_only_405(self): - # An URL that allows GET but not HEAD, linkcheck should fallback on GET. - uv = Url(url=f"{self.live_server_url}/http/getonly/405/") - uv.check_url() - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_external_check_get_only_400(self): - uv = Url(url=f"{self.live_server_url}/http/getonly/400/") - uv.check_url() - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_external_check_blocked_user_agent(self): - uv = Url(url=f"{self.live_server_url}/http/block-user-agent/") - uv.check_url() - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_external_check_blocked_user_agent_blocked_head(self): - uv = Url(url=f"{self.live_server_url}/http/block-user-agent/block-head/") - uv.check_url() - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - @patch( - 'linkcheck.models.PROXIES', - {'http': 'http://proxy.example.com:8080/'}, - ) - @requests_mock.Mocker() - def test_external_proxy_request(self, mocker): - mocker.register_uri('HEAD', 'http://test.com/', reason='OK'), - uv = Url(url='http://test.com/') - self.assertEqual(mocker.called, False) - uv.check_url() - self.assertEqual(mocker.called, True) - self.assertEqual(uv.status, True) - self.assertEqual(uv.message, '200 OK') - self.assertEqual(uv.type, 'external') - last_request = mocker.last_request - self.assertEqual(last_request.hostname, 'test.com') - self.assertEqual(last_request.scheme, 'http') - self.assertEqual(last_request.proxies, {'http': 'http://proxy.example.com:8080/'}) - - def test_external_check_timedout(self): - uv = Url(url=f"{self.live_server_url}/timeout/") - uv.check_url() - self.assertEqual(uv.message, 'Other Error: The read operation timed out') - self.assertEqual(uv.get_message, 'The read operation timed out') - self.assertEqual(uv.error_message, 'The read operation timed out') - self.assertEqual(uv.status, False) - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), None) - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_external_check_rate_limit(self): - uv = Url(url=f"{self.live_server_url}/http/429/") - uv.check_url() - self.assertEqual(uv.last_checked, None) - self.assertEqual(uv.message, '429 Too Many Requests') - self.assertEqual(uv.get_message, 'Broken external link') - self.assertEqual(uv.status, False) - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.anchor_message, '') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '429 Too Many Requests') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_working_external_anchor(self): - uv = Url(url=f"{self.live_server_url}/http/anchor/#anchor") - uv.check_url() - self.assertEqual(uv.message, "200 OK, working external hash anchor") - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, 'Working anchor') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) - def test_broken_external_anchor(self): - uv = Url(url=f"{self.live_server_url}/http/anchor/#broken-anchor") - uv.check_url() - self.assertEqual(uv.message, "200 OK, broken external hash anchor") - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, False) - self.assertEqual(uv.anchor_message, 'Broken anchor') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_broken_external_anchor_tolerated(self): - uv = Url(url=f"{self.live_server_url}/http/anchor/#broken-anchor") - uv.check_url() - self.assertEqual(uv.message, "200 OK, broken external hash anchor") - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, 'Broken anchor') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_redirect_working_external_anchor(self): - uv = Url(url=f"{self.live_server_url}/http/redirect_to_anchor/#anchor") - uv.check_url() - self.assertEqual(uv.message, "302 Found, working external hash anchor") - self.assertEqual(uv.get_message, 'Working temporary redirect') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, 'Working anchor') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '302 Found') - self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') - self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/anchor/') - self.assertEqual(uv.type, 'external') - - @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) - def test_redirect_broken_external_anchor(self): - uv = Url(url=f"{self.live_server_url}/http/redirect_to_anchor/#broken-anchor") - uv.check_url() - self.assertEqual(uv.message, "302 Found, broken external hash anchor") - self.assertEqual(uv.get_message, 'Working temporary redirect') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, False) - self.assertEqual(uv.anchor_message, 'Broken anchor') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '302 Found') - self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') - self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/anchor/') - self.assertEqual(uv.type, 'external') - - def test_redirect_broken_external_anchor_tolerated(self): - uv = Url(url=f"{self.live_server_url}/http/redirect_to_anchor/#broken-anchor") - uv.check_url() - self.assertEqual(uv.message, "302 Found, broken external hash anchor") - self.assertEqual(uv.get_message, 'Working temporary redirect') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, 'Broken anchor') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '302 Found') - self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') - self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/anchor/') - self.assertEqual(uv.type, 'external') - - def test_video_with_time_anchor(self): - uv = Url(url=f"{self.live_server_url}/static-files/video.mp4#t=2.0") - uv.check_url() - self.assertEqual(uv.message, "200 OK") - self.assertEqual(uv.get_message, 'Working external link') - self.assertEqual(uv.error_message, '') - self.assertEqual(uv.status, True) - self.assertEqual(uv.anchor_message, 'Anchor could not be checked') - self.assertEqual(uv.ssl_status, None) - self.assertEqual(uv.ssl_message, 'Insecure link') - self.assertEqual(uv.get_status_code_display(), '200 OK') - self.assertEqual(uv.get_redirect_status_code_display(), None) - self.assertEqual(uv.redirect_to, '') - self.assertEqual(uv.type, 'external') - - def test_forged_video_with_time_anchor(self): - uv = Url(url=f"{self.live_server_url}/static-files/fake-video.mp4#", - ) - self.assertEqual( - repr(Link.objects.first()), - ( - ", " - "source: )>" - ), - ) - - -class ChecklinksTestCase(TestCase): - - def test_checklinks_command(self): - Book.objects.create(title='My Title', description=""" - Here's an external link: External, - an internal link: Internal, - and an image: logo""") - - out = StringIO() - call_command('checklinks', stdout=out) - self.assertEqual( - out.getvalue(), - "Checking all links that haven't been tested for 10080 minutes.\n" - "1 internal URLs and 0 external URLs have been checked.\n" - ) - - yesterday = datetime.now() - timedelta(days=1) - Url.objects.all().update(last_checked=yesterday) - out = StringIO() - call_command('checklinks', externalinterval=20, stdout=out) - self.assertEqual( - out.getvalue(), - "Checking all links that haven't been tested for 20 minutes.\n" - "1 internal URLs and 2 external URLs have been checked.\n" - ) - - Url.objects.all().update(last_checked=yesterday) - out = StringIO() - call_command('checklinks', externalinterval=20, limit=1, stdout=out) - self.assertEqual( - out.getvalue(), - "Checking all links that haven't been tested for 20 minutes.\n" - "Will run maximum of 1 checks this run.\n" - "1 internal URLs and 1 external URLs have been checked.\n" - ) - - -class FindingLinksTestCase(TestCase): - def test_found_links(self): - self.assertEqual(Url.objects.all().count(), 0) - Book.objects.create(title='My Title', description=""" - Here's a link: Example, - and an image: logo""") - self.assertEqual(Url.objects.all().count(), 2) - self.assertQuerySetEqual( - Url.objects.all().order_by('url'), - ["http://www.example.org/", "http://www.example.org/logo.png"], - transform=lambda obj: obj.url - ) - - def test_urls_exceeding_max_length(self): - self.assertEqual(Url.objects.all().count(), 0) - with self.assertLogs(logger="linkcheck", level="WARN") as cm: - Book.objects.create( - title="My Title", - description=( - "Here's a link: Example, and here's a url exceeding " - f"the max length: logo" - ), - ) - # We skip urls which are too long because we can't store them in the database - self.assertIn( - ( - "WARNING:linkcheck.listeners:URL exceeding max length will be skipped: " - f"http://www.example.org/%7BMAX_URL_LENGTH%20*'X'}" - ), - cm.output, - ) - self.assertEqual(Url.objects.all().count(), 1) - - def test_empty_url_field(self): - """ - Test that URLField empty content is excluded depending on ignore_empty list. - """ - all_linklists = apps.get_app_config('linkcheck').all_linklists - all_linklists['Authors'].ignore_empty = ['website'] - try: - Author.objects.create(name="William Shakespeare") - Author.objects.create(name="John Smith", website="http://www.example.org/smith") - self.assertEqual(Url.objects.all().count(), 1) - finally: - all_linklists['Authors'].ignore_empty = [] - Author.objects.create(name="Alphonse Daudet") - # This time, the empty 'website' is extracted - self.assertEqual(Url.objects.all().count(), 2) - - def test_findlinks_command(self): - # Disable listeners to only check the management command - with disable_listeners(): - Author.objects.create(name="John Smith", website="https://www.example.org/smith") - self.assertEqual( - findlinks(), - "Updating all links...\n" - "Urls: 1 created, 0 deleted, 0 unchanged\n" - "Links: 1 created, 0 deleted, 0 unchanged\n" - ) - Author.objects.create(name="John Doe", website="https://www.example.org/doe") - Book.objects.create( - title='My Title', - description="My fav author: John Doe" - ) - self.assertEqual( - findlinks(), - "Updating all links...\n" - "Urls: 1 created, 0 deleted, 1 unchanged\n" - "Links: 2 created, 0 deleted, 1 unchanged\n" - ) - Author.objects.get(name="John Doe").delete() - self.assertEqual( - findlinks(), - "Updating all links...\n" - "Urls: 0 created, 0 deleted, 2 unchanged\n" - "Links: 0 created, 1 deleted, 2 unchanged\n" - ) - Book.objects.first().delete() - self.assertEqual( - findlinks(), - "Updating all links...\n" - "Urls: 0 created, 1 deleted, 1 unchanged\n" - "Links: 0 created, 1 deleted, 1 unchanged\n" - ) - - -class ManagementCommandTestCase(TestCase): - - def test_linkcheck_suggest_config(self): - """ - Test that the config of uncovered models is correctly suggested - """ - out, err = get_command_output('linkcheck_suggest_config') - self.assertEqual( - out, - 'All covered models:\n' - '\x1b[36msampleapp.Book\x1b[0m, \x1b[36msampleapp.Page\x1b[0m\n\n' - 'Suggested config for model sampleapp.UncoveredModel:\n' - '\x1b[36mfrom sampleapp.models import UncoveredModel\n\n' - 'class UncoveredModelLinklist(Linklist):\n' - ' model = UncoveredModel\n\n' - 'linklists = {\n' - ' "UncoveredModel": UncoveredModelLinklist,\n' - '}\n\x1b[0m\n' - ) - self.assertEqual(err, '') - - def test_linkcheck_suggest_config_model(self): - """ - Test that the config of given model is correctly printed - """ - out, err = get_command_output('linkcheck_suggest_config', '--model', 'sampleapp.Author') - self.assertEqual( - out, - 'from sampleapp.models import Author\n\n' - 'class AuthorLinklist(Linklist):\n' - ' model = Author\n\n' - 'linklists = {\n' - ' "Author": AuthorLinklist,\n' - '}\n' - ) - self.assertEqual(err, '') - - def test_linkcheck_suggest_config_model_non_existing(self): - """ - Test that the command raises an error when the model does not exist - """ - with self.assertRaises(CommandError) as cm: - get_command_output('linkcheck_suggest_config', '--model', 'non-existing') - self.assertEqual(str(cm.exception), 'Model "non-existing" does not exist.') - - -class ObjectsUpdateTestCase(TestCase): - def test_update_object(self): - """ - Test that updating a broken URL in an object also updates the - corresponding Link, and don't leak the old URL. - """ - bad_url = "/broken/internal/link" - good_url = "/public/" - author = Author.objects.create(name="John Smith", website=bad_url) - self.assertEqual( - Link.objects.filter(ignore=False, url__status=False).count(), - 1 - ) - self.assertEqual( - Link.objects.filter(ignore=False, url__status=True).count(), - 0 - ) - self.assertEqual(Url.objects.all().count(), 1) - self.assertEqual(Url.objects.all()[0].url, bad_url) - # Fix the link - author.website = good_url - author.save() - self.assertEqual( - Link.objects.filter(ignore=False, url__status=False).count(), - 0 - ) - self.assertEqual( - Link.objects.filter(ignore=False, url__status=True).count(), - 1 - ) - self.assertEqual(Url.objects.all().count(), 1) - self.assertEqual(Url.objects.all()[0].url, good_url) - - -class RegisteringTests(TestCase): - good_url = "/public/" - - def test_unregister(self): - self.assertEqual(Link.objects.count(), 0) - unregister_listeners() - Author.objects.create(name="John Smith", website=self.good_url) - self.assertEqual(Link.objects.count(), 0) - register_listeners() - Author.objects.create(name="Jill Smith", website=self.good_url) - self.assertEqual(Link.objects.count(), 1) - - def test_disable_listeners(self): - self.assertEqual(Link.objects.count(), 0) - with disable_listeners(): - Author.objects.create(name="John Smith", website=self.good_url) - self.assertEqual(Link.objects.count(), 0) - Author.objects.create(name="Jill Smith", website=self.good_url) - self.assertEqual(Link.objects.count(), 1) - - def test_enable_listeners(self): - self.assertEqual(Link.objects.count(), 0) - unregister_listeners() - with enable_listeners(): - Author.objects.create(name="John Smith", website=self.good_url) - self.assertEqual(Link.objects.count(), 1) - Author.objects.create(name="Jill Smith", website=self.good_url) - self.assertEqual(Link.objects.count(), 1) - register_listeners() - - -class QueueTests(TestCase): - def test_queue_handling_continue_on_task_crash(self): - assert tasks_queue.empty() is True - - def raising(): - raise RuntimeError("Failing task") - - def passing(): - pass - - for func in (raising, passing): - tasks_queue.put({ - 'target': func, - 'args': (), - 'kwargs': {}, - }) - with self.assertLogs() as cm: - linkcheck_worker(block=False) - self.assertEqual( - cm.output[0].split('\n')[0], - 'ERROR:linkcheck.listeners:RuntimeError while running raising with ' - 'args=() and kwargs={}: Failing task' - ) - - -class ViewTestCase(TestCase): - def setUp(self): - self.user = User.objects.create_superuser('admin', 'admin@example.org', 'password') - - def test_display_url(http://github.com/self): - Book.objects.create( - title='My Title', description="Here's a link: Example" - ) - Author.objects.create(name="John Smith", website="http://www.example.org/#john") - self.assertEqual(Link.objects.count(), 2) - self.assertEqual( - set([link.display_url for link in Link.objects.all()]), - set(["http://www.example.org/", "http://www.example.org/#john"]), - ) - - def test_report_view(self): - self.client.force_login(self.user) - response = self.client.get(reverse('linkcheck_report')) - self.assertContains(response, "

Link Checker

") - - def test_report_ignore_unignore(self): - Author.objects.create(name="John Smith", website="http://www.example.org/john") - self.client.force_login(self.user) - link = Link.objects.first() - self.assertFalse(link.ignore) - response = self.client.post( - reverse('linkcheck_report') + f"?ignore={link.pk}", - HTTP_X_REQUESTED_WITH='XMLHttpRequest' - ) - self.assertEqual(response.json(), {'link': link.pk}) - link.refresh_from_db() - self.assertTrue(link.ignore) - response = self.client.post( - reverse('linkcheck_report') + f"?unignore={link.pk}", - HTTP_X_REQUESTED_WITH='XMLHttpRequest' - ) - self.assertEqual(response.json(), {'link': link.pk}) - link.refresh_from_db() - self.assertFalse(link.ignore) - - def test_report_recheck(self): - Author.objects.create(name="John Smith", website="http://www.example.org/john") - self.client.force_login(self.user) - link = Link.objects.first() - response = self.client.post( - reverse('linkcheck_report') + f"?recheck={link.pk}", - HTTP_X_REQUESTED_WITH='XMLHttpRequest' - ) - self.assertEqual(response.json(), { - 'colour': 'red', - 'links': [link.pk], - 'message': '404 Not Found', - }) - - -class GetJqueryMinJsTestCase(TestCase): - def test(self): - self.assertEqual( - 'admin/js/vendor/jquery/jquery.min.js', get_jquery_min_js() - ) - - -class FixtureTestCase(TestCase): - fixtures = ['linkcheck/tests/sampleapp/fixture.json'] - - def test_fixture(self): - self.assertEqual(Book.objects.count(), 1) - self.assertEqual(Page.objects.count(), 1) - - -class FilterCallableTestCase(TestCase): - def test_filter_callable(self): - all_linklists = apps.get_app_config('linkcheck').all_linklists - all_linklists['Journals'].html_fields = [] - Journal.objects.create(title='My Title', description=""" - My description Example""") - Journal.objects.create(title='My Title', version=1, description=""" - My new description Example""") - all_linklists['Journals'].html_fields = ['description'] - # assert there are two versions of the same journal - self.assertEqual(Journal.objects.count(), 2) - # assert command just finds the latest version of same journals - self.assertEqual( - findlinks(), - "Updating all links...\n" - "Urls: 1 created, 0 deleted, 0 unchanged\n" - "Links: 1 created, 0 deleted, 0 unchanged\n" - ) - - -def get_command_output(command, *args, **kwargs): - """ - Helper function for running a management command and checking its output - """ - out = StringIO() - err = StringIO() - call_command(command, *args, stdout=out, stderr=err, **kwargs) - return out.getvalue(), err.getvalue() - - -def findlinks(): - """ - Helper function for running the findlinks command and checking its output - """ - return get_command_output('findlinks')[0] diff --git a/linkcheck/tests/urls.py b/linkcheck/tests/urls.py deleted file mode 100644 index 408675f..0000000 --- a/linkcheck/tests/urls.py +++ /dev/null @@ -1,30 +0,0 @@ -from django import http -from django.contrib import admin -from django.urls import include, path -from django.views.generic import RedirectView - -from linkcheck.tests.sampleapp import views - - -def handler404(*args, **kwargs): - return http.HttpResponseNotFound("") - - -urlpatterns = [ - path('admin/linkcheck/', include('linkcheck.urls')), - path('admin/', admin.site.urls), - path('public/', views.http_response, {'code': '200'}), - path('http//', views.http_response), - path('http//rückmeldung/', views.http_response), - path('http/getonly//', views.http_response_get_only), - path('http/block-user-agent/', views.http_block_user_agent), - path('http/block-user-agent/block-head/', views.http_block_user_agent, {'block_head': True}), - path('http/redirect//', views.http_redirect), - path('http/redirect_to_404/', views.http_redirect_to_404), - path('http/redirect_to_anchor/', views.http_redirect_to_anchor), - path('http/brokenredirect/', RedirectView.as_view(url='/non-existent/')), - path('http/anchor/', views.http_response_with_anchor), - path('timeout/', views.timeout), - path('static-files/video.mp4', views.static_video), - path('static-files/fake-video.mp4', views.static_video_forged_content_type), -] diff --git a/linkcheck/urls.py b/linkcheck/urls.py deleted file mode 100644 index 10a4f07..0000000 --- a/linkcheck/urls.py +++ /dev/null @@ -1,7 +0,0 @@ -from django.urls import path - -from . import views - -urlpatterns = [ - path('', views.report, name='linkcheck_report'), -] diff --git a/linkcheck/utils.py b/linkcheck/utils.py deleted file mode 100644 index b40dbb5..0000000 --- a/linkcheck/utils.py +++ /dev/null @@ -1,330 +0,0 @@ -import logging -from datetime import timedelta - -from django.apps import apps -from django.db import models -from django.test.client import ClientHandler -from django.utils import timezone - -from .linkcheck_settings import ( - HTML_FIELD_CLASSES, - IMAGE_FIELD_CLASSES, - MAX_URL_LENGTH, - URL_FIELD_CLASSES, -) -from .models import Link, Url - -logger = logging.getLogger(__name__) - - -class LinkCheckHandler(ClientHandler): - # Customize the ClientHandler to allow us removing some middlewares - - def load_middleware(self): - self.ignore_keywords = ["reversion.middleware", "MaintenanceModeMiddleware", "raven_compat"] - super().load_middleware() - new_request_middleware = [] - - #################################################### - # _request_middleware (is removed in newer django) # - #################################################### - if getattr(self, "_request_middleware", None): - for method in self._request_middleware: - ignored = False - for keyword in self.ignore_keywords: - if method.__str__().count(keyword): - ignored = True - break - if not ignored: - new_request_middleware.append(method) - self._request_middleware = new_request_middleware - - #################### - # _view_middleware # - #################### - new_view_middleware = [] - for method in self._view_middleware: - ignored = False - for keyword in self.ignore_keywords: - if method.__str__().count(keyword): - ignored = True - break - if not ignored: - new_view_middleware.append(method) - self._view_middleware = new_view_middleware - - ########################## - # _response_middleware## # - ########################## - if getattr(self, "_response_middleware", None): - new_response_middleware = [] - for method in self._response_middleware: - ignored = False - for keyword in self.ignore_keywords: - if method.__str__().count(keyword): - ignored = True - break - if not ignored: - new_response_middleware.append(method) - self._response_middleware = new_response_middleware - - ################################# - # _template_response_middleware # - ################################# - if getattr(self, "_template_response_middleware", None): - new_template_response_middleware = [] - for method in self._template_response_middleware: - ignored = False - for keyword in self.ignore_keywords: - if method.__str__().count(keyword): - ignored = True - break - if not ignored: - new_template_response_middleware.append(method) - self._template_response_middleware = new_template_response_middleware - - ######################### - # _exception_middleware # - ######################### - new_exception_middleware = [] - for method in self._exception_middleware: - ignored = False - for keyword in self.ignore_keywords: - if method.__str__().count(keyword): - ignored = True - break - if not ignored: - new_exception_middleware.append(method) - self._exception_middleware = new_exception_middleware - - -def check_links(external_recheck_interval=10080, limit=-1, check_internal=True, check_external=True): - """ - Return the number of links effectively checked. - """ - - urls = Url.objects.all() - - # An optimization for when check_internal is False - if not check_internal: - recheck_datetime = timezone.now() - timedelta(minutes=external_recheck_interval) - urls = urls.exclude(last_checked__gt=recheck_datetime) - - check_count = 0 - for u in urls: - status = u.check_url(http://github.com/check_internal=check_internal,%20check_external=check_external) - check_count += 1 if status is not None else 0 - if -1 < limit <= check_count: - break - - return check_count - - -def update_urls(urls, content_type, object_id): - # Structure of urls param is [(field, link text, url), ... ] - - urls_created = links_created = 0 - new_url_ids = set() - new_link_ids = set() - - for field, link_text, url in urls: - if url is not None and url.startswith("#"): - instance = content_type.get_object_for_this_type(id=object_id) - url = instance.get_absolute_url() + url - - if len(url) > MAX_URL_LENGTH: - # We cannot handle url longer than MAX_URL_LENGTH at the moment - logger.warning("URL exceeding max length will be skipped: %s", url) - continue - - url, url_created = Url.objects.get_or_create(url=url) - - link, link_created = Link.objects.get_or_create( - url=url, - field=field, - text=link_text, - content_type=content_type, - object_id=object_id, - ) - - # Keep track of how many objects were created - urls_created += url_created - links_created += link_created - - # Keep track of object ids (no matter if created or existing) - new_url_ids.add(url.id) - new_link_ids.add(link.id) - - return { - "urls": { - "created": urls_created, - "ids": new_url_ids, - }, - "links": { - "created": links_created, - "ids": new_link_ids, - }, - } - - -def find_all_links(linklists=None): - if linklists is None: - linklists = apps.get_app_config("linkcheck").all_linklists - - urls_created = links_created = 0 - new_url_ids = set() - new_link_ids = set() - - urls_before = Url.objects.count() - links_before = Link.objects.count() - - for linklist_name, linklist_cls in linklists.items(): - content_type = linklist_cls.content_type() - linklists = linklist_cls().get_linklist() - - for linklist in linklists: - object_id = linklist["object"].pk - urls = linklist["urls"] + linklist["images"] - if urls: - new = update_urls(urls, content_type, object_id) - - urls_created += new["urls"]["created"] - links_created += new["links"]["created"] - - new_url_ids.update(new["urls"]["ids"]) - new_link_ids.update(new["links"]["ids"]) - - # Delete all urls and links which are no longer part of the link lists - Url.objects.all().exclude(id__in=new_url_ids).delete() - Link.objects.all().exclude(id__in=new_link_ids).delete() - - # Calculate diff - urls_after = Url.objects.count() - links_after = Link.objects.count() - - return { - "urls": { - "created": urls_created, - "deleted": urls_before + urls_created - urls_after, - "unchanged": urls_after - urls_created, - }, - "links": { - "created": links_created, - "deleted": links_before + links_created - links_after, - "unchanged": links_after - links_created, - }, - } - - -def unignore(): - Link.objects.update(ignore=False) - - -# Utilities for testing models coverage - - -def is_interesting_field(field): - return is_url_field(field) or is_image_field(field) or is_html_field(field) - - -def is_url_field(field): - return any(isinstance(field, cls) for cls in URL_FIELD_CLASSES) - - -def is_image_field(field): - return any(isinstance(field, cls) for cls in IMAGE_FIELD_CLASSES) - - -def is_html_field(field): - return any(isinstance(field, cls) for cls in HTML_FIELD_CLASSES) - - -def has_active_field(klass): - return any(field.name == "active" and isinstance(field, models.BooleanField) for field in klass._meta.fields) - - -def get_ignore_empty_fields(klass): - return [field for field in klass._meta.fields if is_interesting_field(field) and (field.blank or field.null)] - - -def get_type_fields(klass, the_type): - check_funcs = { - "html": is_html_field, - "url": is_url_field, - "image": is_image_field, - } - check_func = check_funcs[the_type] - return [field for field in klass._meta.fields if check_func(field)] - - -def is_model_covered(klass): - app = apps.get_app_config("linkcheck") - return any(linklist[1].model == klass for linklist in app.all_linklists.items()) - - -def format_config(meta, active_field, html_fields, image_fields, url_fields, ignore_empty_fields): - config = f"from {meta.app_label}.models import {meta.object_name}\n\n" - config += f"class {meta.object_name}Linklist(Linklist):\n" - config += f" model = {meta.object_name}\n" - if html_fields: - config += f" html_fields = [{', '.join(map(str, html_fields))}]\n" - if image_fields: - config += f" image_fields = [{', '.join(map(str, image_fields))}]\n" - if url_fields: - config += f" url_fields = [{', '.join(map(str, url_fields))}]\n" - if ignore_empty_fields: - config += f" ignore_empty = [{', '.join(map(str, ignore_empty_fields))}]\n" - if active_field: - config += ' object_filter = {"active": True}\n' - config += f'\nlinklists = {{\n "{meta.object_name}": {meta.object_name}Linklist,\n}}\n' - return config - - -def get_suggested_linklist_config(klass): - meta = klass._meta - html_fields = get_type_fields(klass, "html") - url_fields = get_type_fields(klass, "url") - image_fields = get_type_fields(klass, "image") - active_field = has_active_field(klass) - ignore_empty_fields = get_ignore_empty_fields(klass) - return format_config( - **{ - "meta": meta, - "html_fields": html_fields, - "url_fields": url_fields, - "image_fields": image_fields, - "active_field": active_field, - "ignore_empty_fields": ignore_empty_fields, - } - ) - - -def get_coverage_data(): - """ - Check which models are covered by linkcheck - This view assumes the key for link - """ - covered = [] - uncovered = [] - for app in apps.get_app_configs(): - for model in app.get_models(): - should_append = False - if getattr(model, "get_absolute_url", None): - should_append = True - else: - for field in model._meta.fields: - if is_interesting_field(field): - should_append = True - break - if should_append: - if is_model_covered(model): - covered.append(f"{model._meta.app_label}.{model._meta.object_name}") - else: - uncovered.append( - ( - f"{model._meta.app_label}.{model._meta.object_name}", - get_suggested_linklist_config(model), - ) - ) - - return covered, uncovered diff --git a/linkcheck/views.py b/linkcheck/views.py deleted file mode 100644 index 41de93e..0000000 --- a/linkcheck/views.py +++ /dev/null @@ -1,184 +0,0 @@ -from itertools import groupby -from operator import itemgetter - -from django import forms -from django.contrib.admin.views.decorators import staff_member_required -from django.contrib.contenttypes.models import ContentType -from django.core.exceptions import ObjectDoesNotExist -from django.core.paginator import Paginator -from django.http import JsonResponse -from django.shortcuts import render -from django.templatetags.static import static -from django.urls import NoReverseMatch, reverse -from django.utils.translation import gettext as _ - -from linkcheck import update_lock -from linkcheck.linkcheck_settings import RESULTS_PER_PAGE -from linkcheck.models import Link -from linkcheck.utils import get_coverage_data - - -@staff_member_required -def coverage(request): - - coverage_data = get_coverage_data() - - if request.GET.get('config', False): - # Just render the suggested linklist code - template = 'linkcheck/suggested_configs.html' - context = {'coverage_data': [x['suggested_config'] for x in coverage_data]} - else: - # Render a nice report - template = 'linkcheck/coverage.html' - context = {'coverage_data': coverage_data} - - return render(request, template, context) - - -@staff_member_required -def report(request): - - outerkeyfunc = itemgetter('content_type_id') - content_types_list = [] - - if request.method == 'POST': - - ignore_link_id = request.GET.get('ignore', None) - if ignore_link_id is not None: - link = Link.objects.get(id=ignore_link_id) - link.ignore = True - link.save() - if is_ajax(request): - json_data = {'link': link.pk} - return JsonResponse(json_data) - - unignore_link_id = request.GET.get('unignore', None) - if unignore_link_id is not None: - link = Link.objects.get(id=unignore_link_id) - link.ignore = False - link.save() - if is_ajax(request): - json_data = {'link': link.pk} - return JsonResponse(json_data) - - recheck_link_id = request.GET.get('recheck', None) - if recheck_link_id is not None: - link = Link.objects.get(id=recheck_link_id) - url = link.url - url.check_url(http://github.com/external_recheck_interval=0) - links = [x[0] for x in url.links.values_list('id')] - if is_ajax(request): - json_data = ({ - 'links': links, - 'message': url.message, - 'colour': url.colour, - }) - return JsonResponse(json_data) - - link_filter = request.GET.get('filters', 'show_invalid') - - qset = Link.objects.order_by('-url__last_checked') - if link_filter == 'show_valid': - qset = qset.filter(ignore=False, url__status__exact=True) - report_type = _('Valid links') - elif link_filter == 'show_unchecked': - qset = qset.filter(ignore=False, url__last_checked__exact=None) - report_type = _('Untested links') - elif link_filter == 'ignored': - qset = qset.filter(ignore=True) - report_type = _('Ignored links') - else: - qset = qset.filter(ignore=False, url__status__exact=False) - report_type = _('Broken links') - - paginated_links = Paginator(qset, RESULTS_PER_PAGE, 0, True) - - try: - page = int(request.GET.get("page", "1")) - except ValueError: - page = 0 - # offset = (page - 1) * RESULTS_PER_PAGE - links = paginated_links.page(page) - - # This code groups links into nested lists by content type and object id - # It's a bit nasty but we can't use groupby unless be get values() - # instead of a queryset because of the 'Object is not subscriptable' error - - t = sorted(links.object_list.values(), key=outerkeyfunc) - for tk, tg in groupby(t, outerkeyfunc): - innerkeyfunc = itemgetter('object_id') - objects = [] - tg = sorted(tg, key=innerkeyfunc) - for ok, og in groupby(tg, innerkeyfunc): - content_type = ContentType.objects.get(pk=tk) - og = list(og) - try: - object = None - if content_type.model_class(): - object = content_type.model_class().objects.get(pk=ok) - except ObjectDoesNotExist: - pass - try: - admin_url = object.get_admin_url() # TODO allow method name to be configurable - except AttributeError: - try: - admin_url = reverse(f'admin:{content_type.app_label}_{content_type.model}_change', args=[ok]) - except NoReverseMatch: - admin_url = None - - objects.append({ - 'object': object, - # Convert values_list back to queryset. Do we need to get values() or do we just need a list of ids? - 'link_list': Link.objects.in_bulk([x['id'] for x in og]).values(), - 'admin_url': admin_url, - }) - content_types_list.append({ - 'content_type': content_type, - 'object_list': objects - }) - - # Pass any querystring data back to the form minus page - rqst = request.GET.copy() - if 'page' in rqst: - del rqst['page'] - - return render(request, 'linkcheck/report.html', { - 'content_types_list': content_types_list, - 'pages': links, - 'filter': link_filter, - 'media': forms.Media(js=[static(get_jquery_min_js())]), - 'qry_data': rqst.urlencode(), - 'report_type': report_type, - 'ignored_count': Link.objects.filter(ignore=True).count(), - }, - ) - - -def get_jquery_min_js(): - """ - Return the location of jquery.min.js. It's an entry point to adapt the path - when it changes in Django. - """ - return 'admin/js/vendor/jquery/jquery.min.js' - - -def get_status_message(): - if update_lock.locked(): - return "Still checking. Please refresh this page in a short while. " - else: - broken_links = Link.objects.filter(ignore=False, url__status=False).count() - if broken_links: - return ( - "We've found {} broken link{}.
" - "View/fix broken links".format( - broken_links, - "s" if broken_links > 1 else "", - reverse('linkcheck_report'), - ) - ) - else: - return '' - - -def is_ajax(request): - return request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 4206c9c..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,63 +0,0 @@ -[build-system] -requires = ["setuptools>=61.2"] -build-backend = "build_meta" -backend-path = ["linkcheck"] - -[project] -name = "django-linkcheck" -version = "2.4.0" -authors = [ - {name = "Andy Baker", email = "andy@andybak.net"}, - {name = "Fruits Chen", email = "fruitschen@gmail.com"}, - {name = "Tim Graves", email = "gravesit@gmail.com"}, - {name = "Jannis Leidel", email = "jannis@leidel.info"}, - {name = "Claude Paroz", email = "claude@2xlibre.net"}, - {name = "Timo Brembeck", email = "opensource@timo.brembeck.email"} -] -description = "A Django app that will analyze and report on links in any model that you register with it." -readme = "README.rst" -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Environment :: Web Environment", - "Intended Audience :: Developers", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - "Framework :: Django", - "Framework :: Django :: 4.2", - "Framework :: Django :: 5.2", - "Framework :: Django :: 6.0", -] -license = "BSD-3-Clause" -license-files = ["LICENSE"] -requires-python = ">=3.10" -dependencies = [ - "django>=4.2", - "requests", -] - -[project.urls] -Homepage = "https://github.com/DjangoAdminHackers/django-linkcheck" -Issues = "https://github.com/DjangoAdminHackers/django-linkcheck/issues" -Changelog = "https://github.com/DjangoAdminHackers/django-linkcheck/blob/master/CHANGELOG" - -[project.optional-dependencies] -dev = [ - "build", - "flake8", - "isort", - "pre-commit", - "requests_mock", -] - -[tool.setuptools] -include-package-data = true - -[tool.setuptools.packages.find] -include = ["linkcheck*"] diff --git a/runtests.py b/runtests.py deleted file mode 100644 index c9e50d7..0000000 --- a/runtests.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python -import sys -from os.path import abspath, dirname - -import django -from django.conf import settings - -if not settings.configured: - test_settings = { - 'DATABASES': {'default': {'ENGINE': 'django.db.backends.sqlite3'}}, - 'STATIC_URL': '/static/', - 'MEDIA_URL': '/media/', - 'INSTALLED_APPS': [ - 'django.contrib.admin', 'django.contrib.auth', - 'django.contrib.sessions', 'django.contrib.contenttypes', - 'django.contrib.messages', - 'linkcheck', 'linkcheck.tests.sampleapp', - ], - 'ROOT_URLCONF': "linkcheck.tests.urls", - 'SITE_DOMAIN': "localhost", - 'MIDDLEWARE': [ - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - ], - 'TEMPLATES': [{ - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - 'django.template.context_processors.static', - 'django.template.context_processors.request', - ], - }, - }], - 'DEFAULT_AUTO_FIELD': 'django.db.models.AutoField', - 'SECRET_KEY': 'arandomstring', - 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT': 1, - } - settings.configure(**test_settings) - - -def runtests(*test_args): - from django.test.runner import DiscoverRunner - - parent = dirname(abspath(__file__)) - sys.path.insert(0, parent) - test_runner = DiscoverRunner(verbosity=1, interactive=True) - failures = test_runner.run_tests(test_args) - sys.exit(failures) - - -if __name__ == '__main__': - django.setup() - runtests(*sys.argv[1:])