From 34876b1aa42746fbb73a64ce1a18237fb610dd43 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 07:09:18 +0100 Subject: [PATCH 01/28] =?UTF-8?q?=F0=9F=A7=AA=20Add=20Python=203.11=20CI?= =?UTF-8?q?=20(#251)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/tests.yml | 2 +- pyproject.toml | 1 + tox.ini | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 64a068e3..4ec6da14 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10'] + python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 diff --git a/pyproject.toml b/pyproject.toml index 5ac1ad27..5cc879ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", diff --git a/tox.ini b/tox.ini index f4e117e2..85a7179b 100644 --- a/tox.ini +++ b/tox.ini @@ -9,13 +9,13 @@ envlist = py37 [testenv] usedevelop = true -[testenv:py{37,38,39,310}] +[testenv:py{37,38,39,310,311}] extras = linkify testing commands = pytest {posargs:tests/} -[testenv:py{37,38,39,310}-plugins] +[testenv:py{37,38,39,310,311}-plugins] extras = testing changedir = {envtmpdir} allowlist_externals = From 07e9b7d5a62903e0e95f681b8173bc0d7871abd4 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 13 Mar 2023 22:12:58 -0600 Subject: [PATCH 02/28] =?UTF-8?q?=F0=9F=A7=AA=20Fix=20fuzzing=20test=20fai?= =?UTF-8?q?lures=20(#254)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py, fixes issues 55363 and 55367 --- markdown_it/rules_block/blockquote.py | 5 ++++- markdown_it/rules_block/fence.py | 7 +++++-- markdown_it/rules_block/hr.py | 5 ++++- markdown_it/rules_block/list.py | 5 ++++- tests/test_fuzzer.py | 24 ++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 tests/test_fuzzer.py diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index e00fbf61..965a9e73 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -23,7 +23,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): return False # check the block quote marker - if state.srcCharCode[pos] != 0x3E: # /* > */ + try: + if state.srcCharCode[pos] != 0x3E: # /* > */ + return False + except IndexError: return False pos += 1 diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index fb3c6847..53bc6f2d 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -66,8 +66,11 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): # test break - if state.srcCharCode[pos] != marker: - continue + try: + if state.srcCharCode[pos] != marker: + continue + except IndexError: + break if state.sCount[nextLine] - state.blkIndent >= 4: # closing fence should be indented less than 4 spaces diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 22c69722..953bba23 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -20,7 +20,10 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - marker = state.srcCharCode[pos] + try: + marker = state.srcCharCode[pos] + except IndexError: + return False pos += 1 # Check hr marker: /* * */ /* - */ /* _ */ diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index 9cf8c402..d9c5e554 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -13,7 +13,10 @@ def skipBulletListMarker(state: StateBlock, startLine: int): pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - marker = state.srcCharCode[pos] + try: + marker = state.srcCharCode[pos] + except IndexError: + return -1 pos += 1 # Check bullet /* * */ /* - */ /* + */ if marker != 0x2A and marker != 0x2D and marker != 0x2B: diff --git a/tests/test_fuzzer.py b/tests/test_fuzzer.py new file mode 100644 index 00000000..60cdddaa --- /dev/null +++ b/tests/test_fuzzer.py @@ -0,0 +1,24 @@ +""" +These tests are in response to reports from: +https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py + +In the future, perhaps atheris could be directly used here, +but it was not directly apparent how to integrate it into pytest. +""" +import pytest + +from markdown_it import MarkdownIt + +TESTS = { + 55363: ">```\n>", + 55367: ">-\n>\n>", + # 55371: "[](so»0;!" TODO this did not fail + # 55401: "?c_" * 100_000 TODO this did not fail +} + + +@pytest.mark.parametrize("raw_input", TESTS.values(), ids=TESTS.keys()) +def test_fuzzing(raw_input): + md = MarkdownIt() + md.parse(raw_input) + print(md.render(raw_input)) From e7172489f82b05670674707e23749c2b7ca74ca3 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 25 Mar 2023 22:56:03 +0100 Subject: [PATCH 03/28] =?UTF-8?q?=F0=9F=94=A7=20Minor=20type=20annotation?= =?UTF-8?q?=20fix=20(#259)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markdown_it/_compat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/markdown_it/_compat.py b/markdown_it/_compat.py index 12df1aa6..974d431b 100644 --- a/markdown_it/_compat.py +++ b/markdown_it/_compat.py @@ -4,7 +4,8 @@ import sys from typing import Any +DATACLASS_KWARGS: Mapping[str, Any] if sys.version_info >= (3, 10): - DATACLASS_KWARGS: Mapping[str, Any] = {"slots": True} + DATACLASS_KWARGS = {"slots": True} else: - DATACLASS_KWARGS: Mapping[str, Any] = {} + DATACLASS_KWARGS = {} From 2d46a43bd7a55310452b33f9ed2801f7b5cc4dca Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 25 Apr 2023 17:16:42 +0200 Subject: [PATCH 04/28] =?UTF-8?q?=F0=9F=A7=AA=20CI:=20Add=20fuzzing=20work?= =?UTF-8?q?flow=20for=20PRs=20(#262)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This action runs fuzzing for a brief period of time, only against the actual code added in the PR. It is intended as a relatively quick check, to guard against code introducing crashes in the Markdown parsing, which should in principle always run against any text input. See: https://google.github.io/oss-fuzz/getting-started/continuous-integration/#how-it-works --- .github/workflows/fuzz.yml | 34 ++++++++++++++++++++++++++++++++++ .github/workflows/tests.yml | 1 + 2 files changed, 35 insertions(+) create mode 100644 .github/workflows/fuzz.yml diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 00000000..7df6dc6b --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,34 @@ +name: fuzzing + +# This action runs fuzzing for a brief period of time, +# only aginst the actual code added in the PR. +# It is intended a relatively quick check, +# to guard against code introducing crashes in the Markdown parsing, +# which should in principle always run against any text input. +# See: https://google.github.io/oss-fuzz/getting-started/continuous-integration/#how-it-works + +on: + pull_request: + +jobs: + Fuzzing: + runs-on: ubuntu-latest + steps: + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'markdown-it-py' + language: python + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'markdown-it-py' + language: python + fuzz-seconds: 60 + - name: Upload Crash + uses: actions/upload-artifact@v3 + if: failure() && steps.build.outcome == 'success' + with: + name: artifacts + path: ./out/artifacts diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4ec6da14..a82b59bc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -54,6 +54,7 @@ jobs: flags: pytests file: ./coverage.xml fail_ci_if_error: true + token: ${{ secrets.CODECOV_TOKEN }} test-plugins: From 84f7be164b555584670281ec214d3cf5efb2f2d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Apr 2023 18:19:16 +0200 Subject: [PATCH 05/28] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20actions/check?= =?UTF-8?q?out=20from=202=20to=203=20(#252)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- .github/workflows/benchmark.yml | 2 +- .github/workflows/tests.yml | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index c2b82d33..68cc69bb 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v4 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a82b59bc..8f1cd70b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v4 with: @@ -34,7 +34,7 @@ jobs: python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -63,7 +63,7 @@ jobs: matrix: python-version: ['3.8'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -83,7 +83,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v4 @@ -113,7 +113,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout source - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v4 with: From 505909532975bf0b701dc1b55668df63c28199f9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Apr 2023 18:26:31 +0200 Subject: [PATCH 06/28] [pre-commit.ci] pre-commit autoupdate (#221) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 50e8132a..139ce525 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: - id: isort - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.3.0 hooks: - id: black @@ -40,7 +40,7 @@ repos: additional_dependencies: [flake8-bugbear~=22.7] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.0.1 + rev: v1.2.0 hooks: - id: mypy additional_dependencies: [mdurl] From d1852a5f8e2cd33602d670cf602593e664c7a2a7 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 25 Apr 2023 19:42:26 +0200 Subject: [PATCH 07/28] =?UTF-8?q?=F0=9F=94=A7=20Add=20tox=20env=20for=20fu?= =?UTF-8?q?zz=20testcase=20run=20(#263)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To reproduce failing test cases reported by Google's OSS-Fuzz runs --- .github/workflows/fuzz.yml | 2 ++ docs/conf.py | 2 +- scripts/build_fuzzers.py | 42 ++++++++++++++++++++++++++++++ profiler.py => scripts/profiler.py | 0 tox.ini | 9 ++++++- 5 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 scripts/build_fuzzers.py rename profiler.py => scripts/profiler.py (100%) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 7df6dc6b..a74869a5 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -6,9 +6,11 @@ name: fuzzing # to guard against code introducing crashes in the Markdown parsing, # which should in principle always run against any text input. # See: https://google.github.io/oss-fuzz/getting-started/continuous-integration/#how-it-works +# Note, to reproduce a crash locally, copy to `testcase` file` and run: `tox -e fuzz` on: pull_request: + paths-ignore: ['docs/**', 'tests/**'] jobs: Fuzzing: diff --git a/docs/conf.py b/docs/conf.py index 786eff04..52deea47 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -102,7 +102,7 @@ def run_apidoc(app): this_folder = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) api_folder = os.path.join(this_folder, "api") module_path = os.path.normpath(os.path.join(this_folder, "../")) - ignore_paths = ["../profiler.py", "../conftest.py", "../tests", "../benchmarking"] + ignore_paths = ["../scripts", "../conftest.py", "../tests", "../benchmarking"] ignore_paths = [ os.path.normpath(os.path.join(this_folder, p)) for p in ignore_paths ] diff --git a/scripts/build_fuzzers.py b/scripts/build_fuzzers.py new file mode 100644 index 00000000..3dce8ddf --- /dev/null +++ b/scripts/build_fuzzers.py @@ -0,0 +1,42 @@ +"""Build fuzzers idempotently in a given folder.""" +import argparse +from pathlib import Path +import subprocess + + +def main(): + """Build fuzzers idempotently in a given folder.""" + parser = argparse.ArgumentParser() + parser.add_argument("folder") + args = parser.parse_args() + folder = Path(args.folder) + if not folder.exists(): + print(f"Cloning google/oss-fuzz into: {folder}") + folder.mkdir(parents=True) + subprocess.check_call( + [ + "git", + "clone", + "--single-branch", + "https://github.com/google/oss-fuzz", + str(folder), + ] + ) + else: + print(f"Using google/oss-fuzz in: {folder}") + if not (folder / "build").exists(): + print(f"Building fuzzers in: {folder / 'build'}") + subprocess.check_call( + [ + "python", + str(folder / "infra" / "helper.py"), + "build_fuzzers", + "markdown-it-py", + ] + ) + else: + print(f"Using existing fuzzers in: {folder / 'build'}") + + +if __name__ == "__main__": + main() diff --git a/profiler.py b/scripts/profiler.py similarity index 100% rename from profiler.py rename to scripts/profiler.py diff --git a/tox.ini b/tox.ini index 85a7179b..bf0c8367 100644 --- a/tox.ini +++ b/tox.ini @@ -55,11 +55,18 @@ allowlist_externals = dot commands = mkdir -p "{toxworkdir}/prof" - python -m cProfile -o "{toxworkdir}/prof/output.pstats" profiler.py + python -m cProfile -o "{toxworkdir}/prof/output.pstats" scripts/profiler.py gprof2dot -f pstats -o "{toxworkdir}/prof/output.dot" "{toxworkdir}/prof/output.pstats" dot -Tsvg -o "{toxworkdir}/prof/output.svg" "{toxworkdir}/prof/output.dot" python -c 'import pathlib; print("profiler svg output under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "prof" / "output.svg"))' +[testenv:fuzz] +description = run fuzzer on testcase file +; See: https://google.github.io/oss-fuzz/ +deps = atheris +commands_pre = python scripts/build_fuzzers.py {envdir}/oss-fuzz +commands = python {envdir}/oss-fuzz/infra/helper.py reproduce markdown-it-py fuzz_markdown {posargs:testcase} + [flake8] max-line-length = 100 extend-ignore = E203 From baa8658d0c7d8eceec17b2384109a63ad27bfc05 Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Wed, 31 May 2023 18:56:44 +0100 Subject: [PATCH 08/28] =?UTF-8?q?=F0=9F=A7=AA=20Add=20OSS-Fuzz=20set=20up?= =?UTF-8?q?=20(#255)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- tests/fuzz/README.md | 41 +++++++++++++++++++++ tests/fuzz/fuzz_markdown.py | 23 ++++++++++++ tests/fuzz/fuzz_markdown_extended.py | 53 ++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 tests/fuzz/README.md create mode 100644 tests/fuzz/fuzz_markdown.py create mode 100644 tests/fuzz/fuzz_markdown_extended.py diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md new file mode 100644 index 00000000..87075a70 --- /dev/null +++ b/tests/fuzz/README.md @@ -0,0 +1,41 @@ +# OSS-Fuzz integration + +In principle, core Markdown parsing is designed to never except/crash on any input, +and so [fuzzing](https://en.wikipedia.org/wiki/Fuzzing) can be used to test this conformance. +This folder contains fuzzers which are principally run downstream as part of the infrastructure. + +Any file that matches `fuzz_*.py` in this repository will be built and run on OSS-Fuzz +(see ). + +See for full details. + +## CI integration + +Fuzzing essentially runs forever, or until a crash is found, therefore it cannot be fully integrated into local continous integration testing. +The workflow in `.github/workflows/fuzz.yml` though runs a brief fuzzing on code changed in a PR, +which can be used to provide early warning on code changes. + +## Reproducing crash failures + +If OSS-Fuzz (or the CI workflow) identifies a crash, it will produce a "minimized testcase" file +(e.g. ). + +To reproduce this crash locally, the easiest way is to run the [tox](https://tox.wiki/) environment, provided in this repository, against the test file (see `tox.ini`): + +``` +tox -e fuzz path/to/testcase +``` + +This idempotently sets up a local python environment with markdown-it-py (local dev) and [Atheris](https://pypi.org/project/atheris/) installed, +clones into it, +and builds the fuzzers. +Then the testcase is run within this environment. + +If you wish to simply run the full fuzzing process, +you can activate this environment, then run e.g.: + +``` +python .tox/fuzz/oss-fuzz/infra/helper.py run_fuzzer markdown-it-py fuzz_markdown +``` + +For a more thorough guide on reproducing, see: https://google.github.io/oss-fuzz/advanced-topics/reproducing/ diff --git a/tests/fuzz/fuzz_markdown.py b/tests/fuzz/fuzz_markdown.py new file mode 100644 index 00000000..d78ef697 --- /dev/null +++ b/tests/fuzz/fuzz_markdown.py @@ -0,0 +1,23 @@ +import sys + +import atheris + +from markdown_it import MarkdownIt + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + md = MarkdownIt() + raw_markdown = fdp.ConsumeUnicodeNoSurrogates(sys.maxsize) + md.parse(raw_markdown) + md.render(raw_markdown) + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/tests/fuzz/fuzz_markdown_extended.py b/tests/fuzz/fuzz_markdown_extended.py new file mode 100644 index 00000000..4ba749ee --- /dev/null +++ b/tests/fuzz/fuzz_markdown_extended.py @@ -0,0 +1,53 @@ +import sys + +import atheris + +# Beautified from auto-generated fuzzer at: +# https://github.com/ossf/fuzz-introspector/pull/872#issuecomment-1450847118 +# Auto-fuzz heuristics used: py-autofuzz-heuristics-4.1 +# Imports by the generated code +import markdown_it + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + val_1 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_2 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_3 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_4 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_5 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_6 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_7 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_8 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_9 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_10 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + + try: + c1 = markdown_it.main.MarkdownIt() + c1.render(val_1) + c1.parse(val_2) + c1.renderInline(val_3) + c1.parseInline(val_4) + c1.normalizeLink(val_5) + c1.normalizeLinkText(val_6) + c1.disable(val_7) + c1.enable(val_8) + c1.validateLink(val_9) + c1.configure(val_10) + except ( + ValueError, + KeyError, + TypeError, + ): + # Exceptions thrown by the hit code. + pass + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() From 1ea54577f0b3883a822f778064b6cb35708a4e1b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 20:03:53 +0200 Subject: [PATCH 09/28] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Drop=20Python=203.7?= =?UTF-8?q?=20(#264)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python 3.7 is end-of-life on 2023-06-27 https://devguide.python.org/versions/ --- .github/workflows/tests.yml | 6 +++--- .readthedocs.yml | 2 +- docs/conf.py | 2 +- markdown_it/renderer.py | 7 +------ pyproject.toml | 4 +--- tox.ini | 10 +++++----- 6 files changed, 12 insertions(+), 19 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8f1cd70b..ef1bf557 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['pypy-3.8', '3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 @@ -47,10 +47,10 @@ jobs: run: | pytest tests/ --cov=markdown_it --cov-report=xml --cov-report=term-missing - name: Upload to Codecov - if: matrix.python-version == '3.7' && github.repository == 'executablebooks/markdown-it-py' + if: matrix.python-version == '3.8' && github.repository == 'executablebooks/markdown-it-py' uses: codecov/codecov-action@v3 with: - name: markdown-it-py-pytests-py3.7 + name: markdown-it-py-pytests flags: pytests file: ./coverage.xml fail_ci_if_error: true diff --git a/.readthedocs.yml b/.readthedocs.yml index 32b74c8f..1d750008 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,7 +1,7 @@ version: 2 python: - version: 3 + version: "3.8" install: - method: pip path: . diff --git a/docs/conf.py b/docs/conf.py index 52deea47..08a6e78a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -80,7 +80,7 @@ intersphinx_mapping = { - "python": ("https://docs.python.org/3.7", None), + "python": ("https://docs.python.org/3.8", None), "mdit-py-plugins": ("https://mdit-py-plugins.readthedocs.io/en/latest/", None), } diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 81d0bc37..2d784826 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -9,17 +9,12 @@ class Renderer from collections.abc import MutableMapping, Sequence import inspect -from typing import Any, ClassVar +from typing import Any, ClassVar, Protocol from .common.utils import escapeHtml, unescapeAll from .token import Token from .utils import OptionsDict -try: - from typing import Protocol -except ImportError: # Python <3.8 doesn't have `Protocol` in the stdlib - from typing_extensions import Protocol # type: ignore - class RendererProtocol(Protocol): __output__: ClassVar[str] diff --git a/pyproject.toml b/pyproject.toml index 5cc879ed..da8d9170 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -25,10 +24,9 @@ classifiers = [ "Topic :: Text Processing :: Markup", ] keywords = ["markdown", "lexer", "parser", "commonmark", "markdown-it"] -requires-python = ">=3.7" +requires-python = ">=3.8" dependencies = [ "mdurl~=0.1", - "typing_extensions>=3.7.4;python_version<'3.8'", ] [project.urls] diff --git a/tox.ini b/tox.ini index bf0c8367..251e18df 100644 --- a/tox.ini +++ b/tox.ini @@ -4,18 +4,18 @@ # then run `tox` or `tox -- {pytest args}` # run in parallel using `tox -p` [tox] -envlist = py37 +envlist = py38 [testenv] usedevelop = true -[testenv:py{37,38,39,310,311}] +[testenv:py{38,39,310,311}] extras = linkify testing commands = pytest {posargs:tests/} -[testenv:py{37,38,39,310,311}-plugins] +[testenv:py{38,39,310,311}-plugins] extras = testing changedir = {envtmpdir} allowlist_externals = @@ -27,11 +27,11 @@ commands_pre = commands = pytest {posargs} -[testenv:py{37,38,39}-bench-core] +[testenv:py{38,39,310,311}-bench-core] extras = benchmarking commands = pytest benchmarking/bench_core.py {posargs} -[testenv:py{37,38}-bench-packages] +[testenv:py{38,39,310,311}-bench-packages] extras = benchmarking,compare commands = pytest benchmarking/bench_packages.py {posargs} From 83d66d4fb2f6161125f7302596edb4cc6b15e163 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 20:58:46 +0200 Subject: [PATCH 10/28] =?UTF-8?q?=F0=9F=94=A7=20MAINTAIN:=20Make=20type=20?= =?UTF-8?q?checking=20strict=20(#267)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit and introduce `TypeDict` allowed by Python 3.8+ --- .pre-commit-config.yaml | 6 + docs/conf.py | 23 +-- markdown_it/_punycode.py | 5 +- markdown_it/common/normalize_url.py | 2 +- markdown_it/common/utils.py | 31 +--- markdown_it/helpers/parse_link_destination.py | 2 +- markdown_it/helpers/parse_link_title.py | 4 +- markdown_it/main.py | 54 +++++-- markdown_it/parser_block.py | 27 ++-- markdown_it/parser_core.py | 2 +- markdown_it/parser_inline.py | 12 +- markdown_it/presets/__init__.py | 3 +- markdown_it/presets/commonmark.py | 3 +- markdown_it/presets/default.py | 3 +- markdown_it/presets/zero.py | 3 +- markdown_it/renderer.py | 42 +++--- markdown_it/ruler.py | 56 +++++-- markdown_it/rules_block/blockquote.py | 2 +- markdown_it/rules_block/code.py | 2 +- markdown_it/rules_block/fence.py | 2 +- markdown_it/rules_block/heading.py | 2 +- markdown_it/rules_block/hr.py | 2 +- markdown_it/rules_block/html_block.py | 4 +- markdown_it/rules_block/lheading.py | 2 +- markdown_it/rules_block/list.py | 8 +- markdown_it/rules_block/paragraph.py | 2 +- markdown_it/rules_block/reference.py | 2 +- markdown_it/rules_block/state_block.py | 19 +-- markdown_it/rules_block/table.py | 10 +- markdown_it/rules_core/replacements.py | 2 +- markdown_it/rules_core/state_core.py | 6 +- markdown_it/rules_inline/balance_pairs.py | 10 +- markdown_it/rules_inline/emphasis.py | 7 +- markdown_it/rules_inline/entity.py | 2 +- markdown_it/rules_inline/escape.py | 2 +- markdown_it/rules_inline/html_inline.py | 4 +- markdown_it/rules_inline/image.py | 2 +- markdown_it/rules_inline/link.py | 2 +- markdown_it/rules_inline/newline.py | 2 +- markdown_it/rules_inline/state_inline.py | 18 +-- markdown_it/rules_inline/strikethrough.py | 6 +- markdown_it/rules_inline/text.py | 5 +- markdown_it/rules_inline/text_collapse.py | 2 +- markdown_it/token.py | 10 +- markdown_it/tree.py | 18 ++- markdown_it/utils.py | 138 ++++++++++++------ pyproject.toml | 9 +- tests/test_api/test_main.py | 14 +- tests/test_api/test_token.py | 2 +- tests/test_linkify.py | 1 + tests/test_port/test_references.py | 2 +- tests/test_tree.py | 6 +- 52 files changed, 375 insertions(+), 230 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 139ce525..49f45ed2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,3 +44,9 @@ repos: hooks: - id: mypy additional_dependencies: [mdurl] + exclude: > + (?x)^( + benchmarking/.*\.py| + docs/.*\.py| + scripts/.*\.py| + )$ diff --git a/docs/conf.py b/docs/conf.py index 08a6e78a..e0a6e621 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,19 +44,20 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] nitpicky = True -nitpick_ignore = [ - ("py:class", "Match"), - ("py:class", "Path"), - ("py:class", "x in the interval [0, 1)."), - ("py:class", "markdown_it.helpers.parse_link_destination._Result"), - ("py:class", "markdown_it.helpers.parse_link_title._Result"), - ("py:class", "MarkdownIt"), - ("py:class", "RuleFunc"), - ("py:class", "_NodeType"), - ("py:class", "typing_extensions.Protocol"), +nitpick_ignore_regex = [ + ("py:.*", name) + for name in ( + "_ItemTV", + ".*_NodeType", + ".*Literal.*", + ".*_Result", + "EnvType", + "RuleFunc", + "Path", + "Ellipsis", + ) ] - # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for diff --git a/markdown_it/_punycode.py b/markdown_it/_punycode.py index 9ad24421..f9baad27 100644 --- a/markdown_it/_punycode.py +++ b/markdown_it/_punycode.py @@ -22,6 +22,7 @@ import codecs import re +from typing import Callable REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]") REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]") @@ -32,10 +33,10 @@ def encode(uni: str) -> str: def decode(ascii: str) -> str: - return codecs.decode(ascii, encoding="punycode") # type: ignore[call-overload] + return codecs.decode(ascii, encoding="punycode") # type: ignore -def map_domain(string, fn): +def map_domain(string: str, fn: Callable[[str], str]) -> str: parts = string.split("@") result = "" if len(parts) > 1: diff --git a/markdown_it/common/normalize_url.py b/markdown_it/common/normalize_url.py index afec9284..a4ebbaae 100644 --- a/markdown_it/common/normalize_url.py +++ b/markdown_it/common/normalize_url.py @@ -68,7 +68,7 @@ def normalizeLinkText(url: str) -> str: GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);") -def validateLink(url: str, validator: Callable | None = None) -> bool: +def validateLink(url: str, validator: Callable[[str], bool] | None = None) -> bool: """Validate URL link is allowed in output. This validator can prohibit more than really needed to prevent XSS. diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 9b7c4aeb..ed862e74 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -1,8 +1,10 @@ """Utilities for parsing source text """ +from __future__ import annotations + import html import re -from typing import Any +from typing import Any, Match, TypeVar from .entities import entities @@ -22,29 +24,12 @@ def charCodeAt(src: str, pos: int) -> Any: return None -# Merge objects -# -def assign(obj): - """Merge objects /*from1, from2, from3, ...*/)""" - raise NotImplementedError - # sources = Array.prototype.slice.call(arguments, 1) - - # sources.forEach(function (source) { - # if (!source) { return; } - - # if (typeof source !== 'object') { - # throw new TypeError(source + 'must be object') - # } - - # Object.keys(source).forEach(function (key) { - # obj[key] = source[key] - # }) - # }) - - # return obj +_ItemTV = TypeVar("_ItemTV") -def arrayReplaceAt(src: list, pos: int, newElements: list) -> list: +def arrayReplaceAt( + src: list[_ItemTV], pos: int, newElements: list[_ItemTV] +) -> list[_ItemTV]: """ Remove element from array and put another array at those position. Useful for some operations with tokens @@ -133,7 +118,7 @@ def unescapeMd(string: str) -> str: def unescapeAll(string: str) -> str: - def replacer_func(match): + def replacer_func(match: Match[str]) -> str: escaped = match.group(1) if escaped: return escaped diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index 58b76f3c..d527ce0c 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -8,7 +8,7 @@ class _Result: __slots__ = ("ok", "pos", "lines", "str") - def __init__(self): + def __init__(self) -> None: self.ok = False self.pos = 0 self.lines = 0 diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py index 842c83bc..8f589336 100644 --- a/markdown_it/helpers/parse_link_title.py +++ b/markdown_it/helpers/parse_link_title.py @@ -6,13 +6,13 @@ class _Result: __slots__ = ("ok", "pos", "lines", "str") - def __init__(self): + def __init__(self) -> None: self.ok = False self.pos = 0 self.lines = 0 self.str = "" - def __str__(self): + def __str__(self) -> str: return self.str diff --git a/markdown_it/main.py b/markdown_it/main.py index 7faac5ad..acf8d079 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -2,7 +2,7 @@ from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping from contextlib import contextmanager -from typing import Any +from typing import Any, Literal, overload from . import helpers, presets # noqa F401 from .common import normalize_url, utils # noqa F401 @@ -12,7 +12,7 @@ from .renderer import RendererHTML, RendererProtocol from .rules_core.state_core import StateCore from .token import Token -from .utils import OptionsDict +from .utils import EnvType, OptionsDict, OptionsType, PresetType try: import linkify_it @@ -20,7 +20,7 @@ linkify_it = None -_PRESETS = { +_PRESETS: dict[str, PresetType] = { "default": presets.default.make(), "js-default": presets.js_default.make(), "zero": presets.zero.make(), @@ -32,8 +32,8 @@ class MarkdownIt: def __init__( self, - config: str | Mapping = "commonmark", - options_update: Mapping | None = None, + config: str | PresetType = "commonmark", + options_update: Mapping[str, Any] | None = None, *, renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML, ): @@ -67,6 +67,26 @@ def __init__( def __repr__(self) -> str: return f"{self.__class__.__module__}.{self.__class__.__name__}()" + @overload + def __getitem__(self, name: Literal["inline"]) -> ParserInline: + ... + + @overload + def __getitem__(self, name: Literal["block"]) -> ParserBlock: + ... + + @overload + def __getitem__(self, name: Literal["core"]) -> ParserCore: + ... + + @overload + def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol: + ... + + @overload + def __getitem__(self, name: str) -> Any: + ... + def __getitem__(self, name: str) -> Any: return { "inline": self.inline, @@ -75,7 +95,7 @@ def __getitem__(self, name: str) -> Any: "renderer": self.renderer, }[name] - def set(self, options: MutableMapping) -> None: + def set(self, options: OptionsType) -> None: """Set parser options (in the same format as in constructor). Probably, you will never need it, but you can change options after constructor call. @@ -86,7 +106,7 @@ def set(self, options: MutableMapping) -> None: self.options = OptionsDict(options) def configure( - self, presets: str | Mapping, options_update: Mapping | None = None + self, presets: str | PresetType, options_update: Mapping[str, Any] | None = None ) -> MarkdownIt: """Batch load of all options and component settings. This is an internal method, and you probably will not need it. @@ -108,9 +128,9 @@ def configure( options = config.get("options", {}) or {} if options_update: - options = {**options, **options_update} + options = {**options, **options_update} # type: ignore - self.set(options) + self.set(options) # type: ignore if "components" in config: for name, component in config["components"].items(): @@ -206,7 +226,9 @@ def reset_rules(self) -> Generator[None, None, None]: self[chain].ruler.enableOnly(rules) self.inline.ruler2.enableOnly(chain_rules["inline2"]) - def add_render_rule(self, name: str, function: Callable, fmt: str = "html") -> None: + def add_render_rule( + self, name: str, function: Callable[..., Any], fmt: str = "html" + ) -> None: """Add a rule for rendering a particular Token type. Only applied when ``renderer.__output__ == fmt`` @@ -214,7 +236,9 @@ def add_render_rule(self, name: str, function: Callable, fmt: str = "html") -> N if self.renderer.__output__ == fmt: self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore - def use(self, plugin: Callable, *params, **options) -> MarkdownIt: + def use( + self, plugin: Callable[..., None], *params: Any, **options: Any + ) -> MarkdownIt: """Load specified plugin with given params into current parser instance. (chainable) It's just a sugar to call `plugin(md, params)` with curring. @@ -229,7 +253,7 @@ def func(tokens, idx): plugin(self, *params, **options) return self - def parse(self, src: str, env: MutableMapping | None = None) -> list[Token]: + def parse(self, src: str, env: EnvType | None = None) -> list[Token]: """Parse the source string to a token stream :param src: source string @@ -252,7 +276,7 @@ def parse(self, src: str, env: MutableMapping | None = None) -> list[Token]: self.core.process(state) return state.tokens - def render(self, src: str, env: MutableMapping | None = None) -> Any: + def render(self, src: str, env: EnvType | None = None) -> Any: """Render markdown string into html. It does all magic for you :). :param src: source string @@ -266,7 +290,7 @@ def render(self, src: str, env: MutableMapping | None = None) -> Any: env = {} if env is None else env return self.renderer.render(self.parse(src, env), self.options, env) - def parseInline(self, src: str, env: MutableMapping | None = None) -> list[Token]: + def parseInline(self, src: str, env: EnvType | None = None) -> list[Token]: """The same as [[MarkdownIt.parse]] but skip all block rules. :param src: source string @@ -286,7 +310,7 @@ def parseInline(self, src: str, env: MutableMapping | None = None) -> list[Token self.core.process(state) return state.tokens - def renderInline(self, src: str, env: MutableMapping | None = None) -> Any: + def renderInline(self, src: str, env: EnvType | None = None) -> Any: """Similar to [[MarkdownIt.render]] but for single paragraph content. :param src: source string diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index f331ec54..cd240a8a 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -2,20 +2,25 @@ from __future__ import annotations import logging +from typing import TYPE_CHECKING, Any from . import rules_block from .ruler import Ruler from .rules_block.state_block import StateBlock from .token import Token +from .utils import EnvType + +if TYPE_CHECKING: + from markdown_it import MarkdownIt LOGGER = logging.getLogger(__name__) -_rules: list[tuple] = [ +_rules: list[tuple[str, Any, list[str]]] = [ # First 2 params - rule name & source. Secondary array - list of rules, # which can be terminated by this one. ("table", rules_block.table, ["paragraph", "reference"]), - ("code", rules_block.code), + ("code", rules_block.code, []), ("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]), ( "blockquote", @@ -24,11 +29,11 @@ ), ("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]), ("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]), - ("reference", rules_block.reference), + ("reference", rules_block.reference, []), ("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]), ("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]), - ("lheading", rules_block.lheading), - ("paragraph", rules_block.paragraph), + ("lheading", rules_block.lheading, []), + ("paragraph", rules_block.paragraph, []), ] @@ -39,12 +44,10 @@ class ParserBlock: [[Ruler]] instance. Keep configuration of block rules. """ - def __init__(self): + def __init__(self) -> None: self.ruler = Ruler() - for data in _rules: - name = data[0] - rule = data[1] - self.ruler.push(name, rule, {"alt": data[2] if len(data) > 2 else []}) + for name, rule, alt in _rules: + self.ruler.push(name, rule, {"alt": alt}) def tokenize( self, state: StateBlock, startLine: int, endLine: int, silent: bool = False @@ -96,8 +99,8 @@ def tokenize( def parse( self, src: str, - md, - env, + md: MarkdownIt, + env: EnvType, outTokens: list[Token], ords: tuple[int, ...] | None = None, ) -> list[Token] | None: diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index 32209b32..251b7634 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -21,7 +21,7 @@ class ParserCore: - def __init__(self): + def __init__(self) -> None: self.ruler = Ruler() for name, rule in _rules: self.ruler.push(name, rule) diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index b61c990b..a8228524 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -2,10 +2,16 @@ """ from __future__ import annotations +from typing import TYPE_CHECKING + from . import rules_inline from .ruler import RuleFunc, Ruler from .rules_inline.state_inline import StateInline from .token import Token +from .utils import EnvType + +if TYPE_CHECKING: + from markdown_it import MarkdownIt # Parser rules _rules: list[tuple[str, RuleFunc]] = [ @@ -31,7 +37,7 @@ class ParserInline: - def __init__(self): + def __init__(self) -> None: self.ruler = Ruler() for name, rule in _rules: self.ruler.push(name, rule) @@ -114,7 +120,9 @@ def tokenize(self, state: StateInline) -> None: if state.pending: state.pushPending() - def parse(self, src: str, md, env, tokens: list[Token]) -> list[Token]: + def parse( + self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token] + ) -> list[Token]: """Process input string and push inline tokens into `tokens`""" state = StateInline(src, md, env, tokens) self.tokenize(state) diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py index 16f10e51..22cf74cb 100644 --- a/markdown_it/presets/__init__.py +++ b/markdown_it/presets/__init__.py @@ -1,6 +1,7 @@ __all__ = ("commonmark", "default", "zero", "js_default", "gfm_like") from . import commonmark, default, zero +from ..utils import PresetType js_default = default @@ -16,7 +17,7 @@ class gfm_like: """ @staticmethod - def make(): + def make() -> PresetType: config = commonmark.make() config["components"]["core"]["rules"].append("linkify") config["components"]["block"]["rules"].append("table") diff --git a/markdown_it/presets/commonmark.py b/markdown_it/presets/commonmark.py index e44b66bb..60a39250 100644 --- a/markdown_it/presets/commonmark.py +++ b/markdown_it/presets/commonmark.py @@ -6,9 +6,10 @@ - block: table - inline: strikethrough """ +from ..utils import PresetType -def make(): +def make() -> PresetType: return { "options": { "maxNesting": 20, # Internal protection, recursion limit diff --git a/markdown_it/presets/default.py b/markdown_it/presets/default.py index 59f4855e..c9ab902d 100644 --- a/markdown_it/presets/default.py +++ b/markdown_it/presets/default.py @@ -1,7 +1,8 @@ """markdown-it default options.""" +from ..utils import PresetType -def make(): +def make() -> PresetType: return { "options": { "maxNesting": 100, # Internal protection, recursion limit diff --git a/markdown_it/presets/zero.py b/markdown_it/presets/zero.py index af1d9c7f..fcc5eb3a 100644 --- a/markdown_it/presets/zero.py +++ b/markdown_it/presets/zero.py @@ -2,9 +2,10 @@ "Zero" preset, with nothing enabled. Useful for manual configuring of simple modes. For example, to parse bold/italic only. """ +from ..utils import PresetType -def make(): +def make() -> PresetType: return { "options": { "maxNesting": 20, # Internal protection, recursion limit diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 2d784826..4cddbc67 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -7,20 +7,20 @@ class Renderer """ from __future__ import annotations -from collections.abc import MutableMapping, Sequence +from collections.abc import Sequence import inspect from typing import Any, ClassVar, Protocol from .common.utils import escapeHtml, unescapeAll from .token import Token -from .utils import OptionsDict +from .utils import EnvType, OptionsDict class RendererProtocol(Protocol): __output__: ClassVar[str] def render( - self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping + self, tokens: Sequence[Token], options: OptionsDict, env: EnvType ) -> Any: ... @@ -57,7 +57,7 @@ def strong_close(self, tokens, idx, options, env): __output__ = "html" - def __init__(self, parser=None): + def __init__(self, parser: Any = None): self.rules = { k: v for k, v in inspect.getmembers(self, predicate=inspect.ismethod) @@ -65,7 +65,7 @@ def __init__(self, parser=None): } def render( - self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping + self, tokens: Sequence[Token], options: OptionsDict, env: EnvType ) -> str: """Takes token stream and generates HTML. @@ -88,7 +88,7 @@ def render( return result def renderInline( - self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping + self, tokens: Sequence[Token], options: OptionsDict, env: EnvType ) -> str: """The same as ``render``, but for single token of `inline` type. @@ -111,7 +111,7 @@ def renderToken( tokens: Sequence[Token], idx: int, options: OptionsDict, - env: MutableMapping, + env: EnvType, ) -> str: """Default token renderer. @@ -184,7 +184,7 @@ def renderInlineAsText( self, tokens: Sequence[Token] | None, options: OptionsDict, - env: MutableMapping, + env: EnvType, ) -> str: """Special kludge for image `alt` attributes to conform CommonMark spec. @@ -210,7 +210,9 @@ def renderInlineAsText( ################################################### - def code_inline(self, tokens: Sequence[Token], idx: int, options, env) -> str: + def code_inline( + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType + ) -> str: token = tokens[idx] return ( " str: token = tokens[idx] @@ -242,7 +244,7 @@ def fence( tokens: Sequence[Token], idx: int, options: OptionsDict, - env: MutableMapping, + env: EnvType, ) -> str: token = tokens[idx] info = unescapeAll(token.info).strip() if token.info else "" @@ -294,7 +296,7 @@ def image( tokens: Sequence[Token], idx: int, options: OptionsDict, - env: MutableMapping, + env: EnvType, ) -> str: token = tokens[idx] @@ -308,22 +310,28 @@ def image( return self.renderToken(tokens, idx, options, env) def hardbreak( - self, tokens: Sequence[Token], idx: int, options: OptionsDict, *args + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType ) -> str: return "
\n" if options.xhtmlOut else "
\n" def softbreak( - self, tokens: Sequence[Token], idx: int, options: OptionsDict, *args + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType ) -> str: return ( ("
\n" if options.xhtmlOut else "
\n") if options.breaks else "\n" ) - def text(self, tokens: Sequence[Token], idx: int, *args) -> str: + def text( + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType + ) -> str: return escapeHtml(tokens[idx].content) - def html_block(self, tokens: Sequence[Token], idx: int, *args) -> str: + def html_block( + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType + ) -> str: return tokens[idx].content - def html_inline(self, tokens: Sequence[Token], idx: int, *args) -> str: + def html_inline( + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType + ) -> str: return tokens[idx].content diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 11b937a0..421666cc 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -17,12 +17,14 @@ class Ruler """ from __future__ import annotations -from collections.abc import Callable, Iterable, MutableMapping +from collections.abc import Callable, Iterable from dataclasses import dataclass, field -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypedDict from markdown_it._compat import DATACLASS_KWARGS +from .utils import EnvType + if TYPE_CHECKING: from markdown_it import MarkdownIt @@ -30,7 +32,7 @@ class Ruler class StateBase: srcCharCode: tuple[int, ...] - def __init__(self, src: str, md: MarkdownIt, env: MutableMapping): + def __init__(self, src: str, md: MarkdownIt, env: EnvType): self.src = src self.env = env self.md = md @@ -49,7 +51,11 @@ def src(self, value: str) -> None: # arguments may or may not exist, based on the rule's type (block, # core, inline). Return type is either `None` or `bool` based on the # rule's type. -RuleFunc = Callable +RuleFunc = Callable # type: ignore + + +class RuleOptionsType(TypedDict, total=False): + alt: list[str] @dataclass(**DATACLASS_KWARGS) @@ -61,7 +67,7 @@ class Rule: class Ruler: - def __init__(self): + def __init__(self) -> None: # List of added rules. self.__rules__: list[Rule] = [] # Cached rule chains. @@ -95,7 +101,9 @@ def __compile__(self) -> None: continue self.__cache__[chain].append(rule.fn) - def at(self, ruleName: str, fn: RuleFunc, options=None): + def at( + self, ruleName: str, fn: RuleFunc, options: RuleOptionsType | None = None + ) -> None: """Replace rule by name with new function & options. :param ruleName: rule name to replace. @@ -111,7 +119,13 @@ def at(self, ruleName: str, fn: RuleFunc, options=None): self.__rules__[index].alt = options.get("alt", []) self.__cache__ = None - def before(self, beforeName: str, ruleName: str, fn: RuleFunc, options=None): + def before( + self, + beforeName: str, + ruleName: str, + fn: RuleFunc, + options: RuleOptionsType | None = None, + ) -> None: """Add new rule to chain before one with given name. :param beforeName: new rule will be added before this one. @@ -127,7 +141,13 @@ def before(self, beforeName: str, ruleName: str, fn: RuleFunc, options=None): self.__rules__.insert(index, Rule(ruleName, True, fn, options.get("alt", []))) self.__cache__ = None - def after(self, afterName: str, ruleName: str, fn: RuleFunc, options=None): + def after( + self, + afterName: str, + ruleName: str, + fn: RuleFunc, + options: RuleOptionsType | None = None, + ) -> None: """Add new rule to chain after one with given name. :param afterName: new rule will be added after this one. @@ -145,7 +165,9 @@ def after(self, afterName: str, ruleName: str, fn: RuleFunc, options=None): ) self.__cache__ = None - def push(self, ruleName: str, fn: RuleFunc, options=None): + def push( + self, ruleName: str, fn: RuleFunc, options: RuleOptionsType | None = None + ) -> None: """Push new rule to the end of chain. :param ruleName: new rule will be added to the end of chain. @@ -156,7 +178,9 @@ def push(self, ruleName: str, fn: RuleFunc, options=None): self.__rules__.append(Rule(ruleName, True, fn, (options or {}).get("alt", []))) self.__cache__ = None - def enable(self, names: str | Iterable[str], ignoreInvalid: bool = False): + def enable( + self, names: str | Iterable[str], ignoreInvalid: bool = False + ) -> list[str]: """Enable rules with given names. :param names: name or list of rule names to enable. @@ -166,7 +190,7 @@ def enable(self, names: str | Iterable[str], ignoreInvalid: bool = False): """ if isinstance(names, str): names = [names] - result = [] + result: list[str] = [] for name in names: idx = self.__find__(name) if (idx < 0) and ignoreInvalid: @@ -178,7 +202,9 @@ def enable(self, names: str | Iterable[str], ignoreInvalid: bool = False): self.__cache__ = None return result - def enableOnly(self, names: str | Iterable[str], ignoreInvalid: bool = False): + def enableOnly( + self, names: str | Iterable[str], ignoreInvalid: bool = False + ) -> list[str]: """Enable rules with given names, and disable everything else. :param names: name or list of rule names to enable. @@ -190,9 +216,11 @@ def enableOnly(self, names: str | Iterable[str], ignoreInvalid: bool = False): names = [names] for rule in self.__rules__: rule.enabled = False - self.enable(names, ignoreInvalid) + return self.enable(names, ignoreInvalid) - def disable(self, names: str | Iterable[str], ignoreInvalid: bool = False): + def disable( + self, names: str | Iterable[str], ignoreInvalid: bool = False + ) -> list[str]: """Disable rules with given names. :param names: name or list of rule names to enable. diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 965a9e73..3ca0321c 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -9,7 +9,7 @@ LOGGER = logging.getLogger(__name__) -def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): +def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug( "entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent ) diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py index a796608d..69bd6bdc 100644 --- a/markdown_it/rules_block/code.py +++ b/markdown_it/rules_block/code.py @@ -6,7 +6,7 @@ LOGGER = logging.getLogger(__name__) -def code(state: StateBlock, startLine: int, endLine: int, silent: bool = False): +def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent) if state.sCount[startLine] - state.blkIndent < 4: diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index 53bc6f2d..2bdd95f8 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -6,7 +6,7 @@ LOGGER = logging.getLogger(__name__) -def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): +def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent) haveEndMarker = False diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 064d0702..564e1726 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -9,7 +9,7 @@ LOGGER = logging.getLogger(__name__) -def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): +def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent) pos = state.bMarks[startLine] + state.tShift[startLine] diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 953bba23..72ea010d 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -10,7 +10,7 @@ LOGGER = logging.getLogger(__name__) -def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): +def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent) pos = state.bMarks[startLine] + state.tShift[startLine] diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index 31afab76..4831f562 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -12,7 +12,7 @@ # An array of opening and corresponding closing sequences for html tags, # last argument defines whether it can terminate a paragraph or not -HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [ +HTML_SEQUENCES: list[tuple[re.Pattern[str], re.Pattern[str], bool]] = [ ( re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE), re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE), @@ -31,7 +31,7 @@ ] -def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool): +def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug( "entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent ) diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index 92632acc..a3806f8e 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -7,7 +7,7 @@ LOGGER = logging.getLogger(__name__) -def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool): +def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent) level = None diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index d9c5e554..1592b599 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -9,7 +9,7 @@ # Search `[-+*][\n ]`, returns next pos after marker on success # or -1 on fail. -def skipBulletListMarker(state: StateBlock, startLine: int): +def skipBulletListMarker(state: StateBlock, startLine: int) -> int: pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] @@ -34,7 +34,7 @@ def skipBulletListMarker(state: StateBlock, startLine: int): # Search `\d+[.)][\n ]`, returns next pos after marker on success # or -1 on fail. -def skipOrderedListMarker(state: StateBlock, startLine: int): +def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: start = state.bMarks[startLine] + state.tShift[startLine] pos = start maximum = state.eMarks[startLine] @@ -83,7 +83,7 @@ def skipOrderedListMarker(state: StateBlock, startLine: int): return pos -def markTightParagraphs(state: StateBlock, idx: int): +def markTightParagraphs(state: StateBlock, idx: int) -> None: level = state.level + 2 i = idx + 2 @@ -96,7 +96,7 @@ def markTightParagraphs(state: StateBlock, idx: int): i += 1 -def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): +def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent) isTerminatingParagraph = False diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py index fef7edf7..3c7d43d3 100644 --- a/markdown_it/rules_block/paragraph.py +++ b/markdown_it/rules_block/paragraph.py @@ -7,7 +7,7 @@ LOGGER = logging.getLogger(__name__) -def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool = False): +def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug( "entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent ) diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 39e21eb6..5689064b 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -6,7 +6,7 @@ LOGGER = logging.getLogger(__name__) -def reference(state: StateBlock, startLine, _endLine, silent): +def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> bool: LOGGER.debug( "entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent ) diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index c5589149..7ddf806c 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -1,10 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal from ..common.utils import isSpace from ..ruler import StateBase from ..token import Token +from ..utils import EnvType if TYPE_CHECKING: from markdown_it.main import MarkdownIt @@ -15,7 +16,7 @@ def __init__( self, src: str, md: MarkdownIt, - env, + env: EnvType, tokens: list[Token], srcCharCode: tuple[int, ...] | None = None, ): @@ -36,11 +37,11 @@ def __init__( self.tokens = tokens - self.bMarks = [] # line begin offsets for fast jumps - self.eMarks = [] # line end offsets for fast jumps + self.bMarks: list[int] = [] # line begin offsets for fast jumps + self.eMarks: list[int] = [] # line end offsets for fast jumps # offsets of the first non-space characters (tabs not expanded) - self.tShift = [] - self.sCount = [] # indents for each line (tabs expanded) + self.tShift: list[int] = [] + self.sCount: list[int] = [] # indents for each line (tabs expanded) # An amount of virtual spaces (tabs expanded) between beginning # of each line (bMarks) and real beginning of that line. @@ -52,7 +53,7 @@ def __init__( # an initial tab length, e.g. bsCount=21 applied to string `\t123` # means first tab should be expanded to 4-21%4 === 3 spaces. # - self.bsCount = [] + self.bsCount: list[int] = [] # block parser variables self.blkIndent = 0 # required block content indent (for example, if we are @@ -115,13 +116,13 @@ def __init__( self.lineMax = len(self.bMarks) - 1 # don't count last fake line - def __repr__(self): + def __repr__(self) -> str: return ( f"{self.__class__.__name__}" f"(line={self.line},level={self.level},tokens={len(self.tokens)})" ) - def push(self, ttype: str, tag: str, nesting: int) -> Token: + def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token: """Push new token to "stream".""" token = Token(ttype, tag, nesting) token.block = True diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index e3db8584..c432d44f 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -1,4 +1,6 @@ # GFM table, https://github.github.com/gfm/#tables-extension- +from __future__ import annotations + import re from ..common.utils import charCodeAt, isSpace @@ -8,7 +10,7 @@ enclosingPipesRe = re.compile(r"^\||\|$") -def getLine(state: StateBlock, line: int): +def getLine(state: StateBlock, line: int) -> str: pos = state.bMarks[line] + state.tShift[line] maximum = state.eMarks[line] @@ -16,8 +18,8 @@ def getLine(state: StateBlock, line: int): return state.src[pos:maximum] -def escapedSplit(string): - result = [] +def escapedSplit(string: str) -> list[str]: + result: list[str] = [] pos = 0 max = len(string) isEscaped = False @@ -47,7 +49,7 @@ def escapedSplit(string): return result -def table(state: StateBlock, startLine: int, endLine: int, silent: bool): +def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: tbodyLines = None # should have at least two lines diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index 5e9b7ae7..e5d81c7a 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -56,7 +56,7 @@ SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"} -def replaceFn(match: re.Match[str]): +def replaceFn(match: re.Match[str]) -> str: return SCOPED_ABBR[match.group(1).lower()] diff --git a/markdown_it/rules_core/state_core.py b/markdown_it/rules_core/state_core.py index 15b7c605..a938041d 100644 --- a/markdown_it/rules_core/state_core.py +++ b/markdown_it/rules_core/state_core.py @@ -1,10 +1,10 @@ from __future__ import annotations -from collections.abc import MutableMapping from typing import TYPE_CHECKING from ..ruler import StateBase from ..token import Token +from ..utils import EnvType if TYPE_CHECKING: from markdown_it import MarkdownIt @@ -15,9 +15,9 @@ def __init__( self, src: str, md: MarkdownIt, - env: MutableMapping, + env: EnvType, tokens: list[Token] | None = None, - ): + ) -> None: self.src = src self.md = md # link to parser instance self.env = env diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index 5423b5d6..ce0a0884 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -1,9 +1,11 @@ -# For each opening emphasis-like marker find a matching closing one -# -from .state_inline import StateInline +"""Balance paired characters (*, _, etc) in inline tokens.""" +from __future__ import annotations +from .state_inline import Delimiter, StateInline -def processDelimiters(state: StateInline, delimiters, *args): + +def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: + """For each opening emphasis-like marker find a matching closing one.""" openersBottom = {} maximum = len(delimiters) diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index 5262430b..d21b494c 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -1,10 +1,11 @@ # Process *this* and _that_ # +from __future__ import annotations from .state_inline import Delimiter, StateInline -def tokenize(state: StateInline, silent: bool): +def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos marker = state.srcCharCode[start] @@ -38,7 +39,7 @@ def tokenize(state: StateInline, silent: bool): return True -def _postProcess(state, delimiters): +def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: i = len(delimiters) - 1 while i >= 0: startDelim = delimiters[i] @@ -92,7 +93,7 @@ def _postProcess(state, delimiters): i -= 1 -def postProcess(state: StateInline): +def postProcess(state: StateInline) -> None: """Walk through delimiter list and replace text tokens with tags.""" _postProcess(state, state.delimiters) diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 08d271ed..9c4c6a0e 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -9,7 +9,7 @@ NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE) -def entity(state: StateInline, silent: bool): +def entity(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 36bd0402..1767e01d 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -9,7 +9,7 @@ ESCAPED[ord(ch)] = 1 -def escape(state: StateInline, silent: bool): +def escape(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index b875e884..6a636684 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -3,13 +3,13 @@ from .state_inline import StateInline -def isLetter(ch: int): +def isLetter(ch: int) -> bool: lc = ch | 0x20 # to lower case # /* a */ and /* z */ return (lc >= 0x61) and (lc <= 0x7A) -def html_inline(state: StateInline, silent: bool): +def html_inline(state: StateInline, silent: bool) -> bool: pos = state.pos if not state.md.options.get("html", None): diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index d7215bdf..0cb14ffd 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -6,7 +6,7 @@ from .state_inline import StateInline -def image(state: StateInline, silent: bool): +def image(state: StateInline, silent: bool) -> bool: label = None href = "" oldPos = state.pos diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index a6345152..c4548ccd 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -4,7 +4,7 @@ from .state_inline import StateInline -def link(state: StateInline, silent: bool): +def link(state: StateInline, silent: bool) -> bool: href = "" title = "" label = None diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index 3034e408..4c440579 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -7,7 +7,7 @@ endSpace = re.compile(r" +$") -def newline(state: StateInline, silent: bool): +def newline(state: StateInline, silent: bool) -> bool: pos = state.pos # /* \n */ diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 283532cc..7c1cb1f3 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -1,14 +1,14 @@ from __future__ import annotations from collections import namedtuple -from collections.abc import MutableMapping from dataclasses import dataclass -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Literal from .._compat import DATACLASS_KWARGS from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace from ..ruler import StateBase from ..token import Token +from ..utils import EnvType if TYPE_CHECKING: from markdown_it import MarkdownIt @@ -50,13 +50,13 @@ class Delimiter: class StateInline(StateBase): def __init__( - self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token] - ): + self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token] + ) -> None: self.src = src self.env = env self.md = md self.tokens = outTokens - self.tokens_meta: list[dict | None] = [None] * len(outTokens) + self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens) self.pos = 0 self.posMax = len(self.src) @@ -78,13 +78,13 @@ def __init__( self.backticks: dict[int, int] = {} self.backticksScanned = False - def __repr__(self): + def __repr__(self) -> str: return ( f"{self.__class__.__name__}" f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})" ) - def pushPending(self): + def pushPending(self) -> Token: token = Token("text", "", 0) token.content = self.pending token.level = self.pendingLevel @@ -92,7 +92,7 @@ def pushPending(self): self.pending = "" return token - def push(self, ttype, tag, nesting): + def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token: """Push new token to "stream". If pending text exists - flush it as text token """ @@ -121,7 +121,7 @@ def push(self, ttype, tag, nesting): self.tokens_meta.append(token_meta) return token - def scanDelims(self, start, canSplitWord): + def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: """ Scan a sequence of emphasis-like markers, and determine whether it can start an emphasis sequence or end an emphasis sequence. diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index 9b062a66..8b080816 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -4,7 +4,7 @@ from .state_inline import Delimiter, StateInline -def tokenize(state: StateInline, silent: bool): +def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos marker = state.srcCharCode[start] @@ -52,7 +52,7 @@ def tokenize(state: StateInline, silent: bool): return True -def _postProcess(state: StateInline, delimiters: list[Delimiter]): +def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: loneMarkers = [] maximum = len(delimiters) @@ -113,7 +113,7 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]): state.tokens[i] = token -def postProcess(state: StateInline): +def postProcess(state: StateInline) -> None: """Walk through delimiter list and replace text tokens with tags.""" tokens_meta = state.tokens_meta maximum = len(state.tokens_meta) diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index ec6ee0fa..bdf55310 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -1,5 +1,6 @@ # Skip text characters for text token, place those to pending buffer # and increment current pos +from typing import Any from .state_inline import StateInline @@ -12,7 +13,7 @@ # http://spec.commonmark.org/0.15/#ascii-punctuation-character -def isTerminatorChar(ch): +def isTerminatorChar(ch: int) -> bool: return ch in { 0x0A, # /* \n */: 0x21, # /* ! */: @@ -40,7 +41,7 @@ def isTerminatorChar(ch): } -def text(state: StateInline, silent: bool, **args): +def text(state: StateInline, silent: bool, **args: Any) -> bool: pos = state.pos posMax = state.posMax while (pos < posMax) and not isTerminatorChar(state.srcCharCode[pos]): diff --git a/markdown_it/rules_inline/text_collapse.py b/markdown_it/rules_inline/text_collapse.py index 6d0c0ab6..e09289cf 100644 --- a/markdown_it/rules_inline/text_collapse.py +++ b/markdown_it/rules_inline/text_collapse.py @@ -1,7 +1,7 @@ from .state_inline import StateInline -def text_collapse(state: StateInline, *args): +def text_collapse(state: StateInline) -> None: """ Clean up tokens after emphasis and strikethrough postprocessing: merge adjacent text nodes into one and re-calculate all token levels diff --git a/markdown_it/token.py b/markdown_it/token.py index 7a41a784..e3f6c9b9 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -2,7 +2,7 @@ from collections.abc import Callable, MutableMapping import dataclasses as dc -from typing import Any +from typing import Any, Literal import warnings from markdown_it._compat import DATACLASS_KWARGS @@ -28,7 +28,7 @@ class Token: tag: str """HTML tag name, e.g. 'p'""" - nesting: int + nesting: Literal[-1, 0, 1] """Level change (number in {-1, 0, 1} set), where: - `1` means the tag is opening - `0` means the tag is self-closing @@ -63,7 +63,7 @@ class Token: - The string value of the item marker for ordered-list "list_item_open" tokens """ - meta: dict = dc.field(default_factory=dict) + meta: dict[Any, Any] = dc.field(default_factory=dict) """A place for plugins to store any arbitrary data""" block: bool = False @@ -76,7 +76,7 @@ class Token: Used for tight lists to hide paragraphs. """ - def __post_init__(self): + def __post_init__(self) -> None: self.attrs = convert_attrs(self.attrs) def attrIndex(self, name: str) -> int: @@ -129,7 +129,7 @@ def as_dict( *, children: bool = True, as_upstream: bool = True, - meta_serializer: Callable[[dict], Any] | None = None, + meta_serializer: Callable[[dict[Any, Any]], Any] | None = None, filter: Callable[[str, Any], bool] | None = None, dict_factory: Callable[..., MutableMapping[str, Any]] = dict, ) -> MutableMapping[str, Any]: diff --git a/markdown_it/tree.py b/markdown_it/tree.py index 09476b22..a39ba32a 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -9,7 +9,6 @@ from typing import Any, NamedTuple, TypeVar, overload from .token import Token -from .utils import _removesuffix class _NesterTokens(NamedTuple): @@ -51,7 +50,7 @@ def __init__( # Empty list unless a non-empty container, or unnested token that has # children (i.e. inline or img) - self._children: list = [] + self._children: list[Any] = [] if create_root: self._set_children_from_tokens(tokens) @@ -119,7 +118,7 @@ def children(self: _NodeType, value: list[_NodeType]) -> None: @property def parent(self: _NodeType) -> _NodeType | None: - return self._parent + return self._parent # type: ignore @parent.setter def parent(self: _NodeType, value: _NodeType | None) -> None: @@ -314,7 +313,7 @@ def info(self) -> str: return self._attribute_token().info @property - def meta(self) -> dict: + def meta(self) -> dict[Any, Any]: """A place for plugins to store an arbitrary data.""" return self._attribute_token().meta @@ -328,3 +327,14 @@ def hidden(self) -> bool: """If it's true, ignore this element when rendering. Used for tight lists to hide paragraphs.""" return self._attribute_token().hidden + + +def _removesuffix(string: str, suffix: str) -> str: + """Remove a suffix from a string. + + Replace this with str.removesuffix() from stdlib when minimum Python + version is 3.9. + """ + if suffix and string.endswith(suffix): + return string[: -len(suffix)] + return string diff --git a/markdown_it/utils.py b/markdown_it/utils.py index 2ba2995a..a9793720 100644 --- a/markdown_it/utils.py +++ b/markdown_it/utils.py @@ -1,95 +1,160 @@ from __future__ import annotations -from collections.abc import Callable +from collections.abc import MutableMapping as MutableMappingABC from pathlib import Path +from typing import Any, Callable, Iterable, MutableMapping, TypedDict, cast + +EnvType = MutableMapping[str, Any] # note: could use TypeAlias in python 3.10 +"""Type for the environment sandbox used in parsing and rendering, +which stores mutable variables for use by plugins and rules. +""" + + +class OptionsType(TypedDict): + """Options for parsing.""" + + maxNesting: int + """Internal protection, recursion limit.""" + html: bool + """Enable HTML tags in source.""" + linkify: bool + """Enable autoconversion of URL-like texts to links.""" + typographer: bool + """Enable smartquotes and replacements.""" + quotes: str + """Quote characters.""" + xhtmlOut: bool + """Use '/' to close single tags (
).""" + breaks: bool + """Convert newlines in paragraphs into
.""" + langPrefix: str + """CSS language prefix for fenced blocks.""" + highlight: Callable[[str, str, str], str] | None + """Highlighter function: (content, lang, attrs) -> str.""" + + +class PresetType(TypedDict): + """Preset configuration for markdown-it.""" + + options: OptionsType + """Options for parsing.""" + components: MutableMapping[str, MutableMapping[str, list[str]]] + """Components for parsing and rendering.""" + + +class OptionsDict(MutableMappingABC): # type: ignore + """A dictionary, with attribute access to core markdownit configuration options.""" + # Note: ideally we would probably just remove attribute access entirely, + # but we keep it for backwards compatibility. -class OptionsDict(dict): - """A dictionary, with attribute access to core markdownit configuration options.""" + def __init__(self, options: OptionsType) -> None: + self._options = cast(OptionsType, dict(options)) + + def __getitem__(self, key: str) -> Any: + return self._options[key] # type: ignore[literal-required] + + def __setitem__(self, key: str, value: Any) -> None: + self._options[key] = value # type: ignore[literal-required] + + def __delitem__(self, key: str) -> None: + del self._options[key] # type: ignore + + def __iter__(self) -> Iterable[str]: # type: ignore + return iter(self._options) + + def __len__(self) -> int: + return len(self._options) + + def __repr__(self) -> str: + return repr(self._options) + + def __str__(self) -> str: + return str(self._options) @property def maxNesting(self) -> int: """Internal protection, recursion limit.""" - return self["maxNesting"] + return self._options["maxNesting"] @maxNesting.setter - def maxNesting(self, value: int): - self["maxNesting"] = value + def maxNesting(self, value: int) -> None: + self._options["maxNesting"] = value @property def html(self) -> bool: """Enable HTML tags in source.""" - return self["html"] + return self._options["html"] @html.setter - def html(self, value: bool): - self["html"] = value + def html(self, value: bool) -> None: + self._options["html"] = value @property def linkify(self) -> bool: """Enable autoconversion of URL-like texts to links.""" - return self["linkify"] + return self._options["linkify"] @linkify.setter - def linkify(self, value: bool): - self["linkify"] = value + def linkify(self, value: bool) -> None: + self._options["linkify"] = value @property def typographer(self) -> bool: """Enable smartquotes and replacements.""" - return self["typographer"] + return self._options["typographer"] @typographer.setter - def typographer(self, value: bool): - self["typographer"] = value + def typographer(self, value: bool) -> None: + self._options["typographer"] = value @property def quotes(self) -> str: """Quote characters.""" - return self["quotes"] + return self._options["quotes"] @quotes.setter - def quotes(self, value: str): - self["quotes"] = value + def quotes(self, value: str) -> None: + self._options["quotes"] = value @property def xhtmlOut(self) -> bool: """Use '/' to close single tags (
).""" - return self["xhtmlOut"] + return self._options["xhtmlOut"] @xhtmlOut.setter - def xhtmlOut(self, value: bool): - self["xhtmlOut"] = value + def xhtmlOut(self, value: bool) -> None: + self._options["xhtmlOut"] = value @property def breaks(self) -> bool: """Convert newlines in paragraphs into
.""" - return self["breaks"] + return self._options["breaks"] @breaks.setter - def breaks(self, value: bool): - self["breaks"] = value + def breaks(self, value: bool) -> None: + self._options["breaks"] = value @property def langPrefix(self) -> str: """CSS language prefix for fenced blocks.""" - return self["langPrefix"] + return self._options["langPrefix"] @langPrefix.setter - def langPrefix(self, value: str): - self["langPrefix"] = value + def langPrefix(self, value: str) -> None: + self._options["langPrefix"] = value @property def highlight(self) -> Callable[[str, str, str], str] | None: """Highlighter function: (content, langName, langAttrs) -> escaped HTML.""" - return self["highlight"] + return self._options["highlight"] @highlight.setter - def highlight(self, value: Callable[[str, str, str], str] | None): - self["highlight"] = value + def highlight(self, value: Callable[[str, str, str], str] | None) -> None: + self._options["highlight"] = value -def read_fixture_file(path: str | Path) -> list[list]: +def read_fixture_file(path: str | Path) -> list[list[Any]]: text = Path(path).read_text(encoding="utf-8") tests = [] section = 0 @@ -109,14 +174,3 @@ def read_fixture_file(path: str | Path) -> list[list]: last_pos = i return tests - - -def _removesuffix(string: str, suffix: str) -> str: - """Remove a suffix from a string. - - Replace this with str.removesuffix() from stdlib when minimum Python - version is 3.9. - """ - if suffix and string.endswith(suffix): - return string[: -len(suffix)] - return string diff --git a/pyproject.toml b/pyproject.toml index da8d9170..acf2a288 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,9 +87,12 @@ force_sort_within_sections = true show_error_codes = true warn_unused_ignores = true warn_redundant_casts = true -no_implicit_optional = true -strict_equality = true -implicit_reexport = false +strict = true + +[[tool.mypy.overrides]] +module = ["tests.*"] +disallow_untyped_calls = false +disallow_untyped_defs = false [[tool.mypy.overrides]] module = ["tests.test_plugins.*", "markdown.*"] diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py index 007259e3..c3a9ac8b 100644 --- a/tests/test_api/test_main.py +++ b/tests/test_api/test_main.py @@ -150,7 +150,7 @@ def test_parseInline(): type="inline", tag="", nesting=0, - attrs=None, + attrs={}, map=[0, 1], level=0, children=[ @@ -158,7 +158,7 @@ def test_parseInline(): type="text", tag="", nesting=0, - attrs=None, + attrs={}, map=None, level=0, children=None, @@ -173,7 +173,7 @@ def test_parseInline(): type="softbreak", tag="br", nesting=0, - attrs=None, + attrs={}, map=None, level=0, children=None, @@ -188,7 +188,7 @@ def test_parseInline(): type="softbreak", tag="br", nesting=0, - attrs=None, + attrs={}, map=None, level=0, children=None, @@ -203,7 +203,7 @@ def test_parseInline(): type="text", tag="", nesting=0, - attrs=None, + attrs={}, map=None, level=0, children=None, @@ -239,7 +239,7 @@ def test_emptyStr(): type="inline", tag="", nesting=0, - attrs=None, + attrs={}, map=[0, 1], level=0, children=[], @@ -257,7 +257,7 @@ def test_empty_env(): """Test that an empty `env` is mutated, not copied and mutated.""" md = MarkdownIt() - env = {} + env = {} # type: ignore md.render("[foo]: /url\n[foo]", env) assert "references" in env diff --git a/tests/test_api/test_token.py b/tests/test_api/test_token.py index e3806b50..44035981 100644 --- a/tests/test_api/test_token.py +++ b/tests/test_api/test_token.py @@ -24,7 +24,7 @@ def test_token(): assert token.attrGet("a") == "b" token.attrJoin("a", "c") assert token.attrGet("a") == "b c" - token.attrPush(["x", "y"]) + token.attrPush(("x", "y")) assert token.attrGet("x") == "y" with warnings.catch_warnings(): warnings.simplefilter("ignore") diff --git a/tests/test_linkify.py b/tests/test_linkify.py index 96d506d1..48b1981c 100644 --- a/tests/test_linkify.py +++ b/tests/test_linkify.py @@ -6,6 +6,7 @@ def test_token_levels(): tokens = mdit.parse("www.python.org") inline = tokens[1] assert inline.type == "inline" + assert inline.children link_open = inline.children[0] assert link_open.type == "link_open" link_text = inline.children[1] diff --git a/tests/test_port/test_references.py b/tests/test_port/test_references.py index 75bf7130..97f8a65a 100644 --- a/tests/test_port/test_references.py +++ b/tests/test_port/test_references.py @@ -4,7 +4,7 @@ def test_ref_definitions(): md = MarkdownIt() src = "[a]: abc\n\n[b]: xyz\n\n[b]: ijk" - env = {} + env = {} # type: ignore tokens = md.parse(src, env) assert tokens == [] assert env == { diff --git a/tests/test_tree.py b/tests/test_tree.py index 7a7d605e..c5203b0b 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -20,7 +20,7 @@ def test_property_passthrough(): tree = SyntaxTreeNode(tokens) heading_node = tree.children[0] assert heading_open.tag == heading_node.tag - assert tuple(heading_open.map) == heading_node.map + assert tuple(heading_open.map or ()) == heading_node.map assert heading_open.level == heading_node.level assert heading_open.content == heading_node.content assert heading_open.markup == heading_node.markup @@ -49,11 +49,13 @@ def test_sibling_traverse(): text_node = paragraph_inline_node.children[0] assert text_node.type == "text" strong_node = text_node.next_sibling + assert strong_node assert strong_node.type == "strong" another_text_node = strong_node.next_sibling + assert another_text_node assert another_text_node.type == "text" assert another_text_node.next_sibling is None - assert another_text_node.previous_sibling.previous_sibling == text_node + assert another_text_node.previous_sibling.previous_sibling == text_node # type: ignore assert text_node.previous_sibling is None From 9251695727cfa948bb18fc76a5dc85495cacc361 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 21:18:09 +0200 Subject: [PATCH 11/28] =?UTF-8?q?=F0=9F=91=8C=20Centralise=20indented=20co?= =?UTF-8?q?de=20block=20test=20(#260)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For CommonMark, the presence of indented code blocks prevent any other block element from having an indent of greater than 4 spaces. Certain Markdown flavors and derivatives, such as mdx and djot, disable these code blocks though, since it is more common to use code fences and/or arbitrary indenting is desirable. Currently, disabling code blocks does not remove the indent limitation, since most block elements have the 3 space limitation hard-coded. This commit therefore centralises the logic of applying this limitation, and only applies it when indented code blocks are enabled. Note, this is a potential breaking change and divergence from upstream markdown-it, for this niche case, but I feel makes sense and could even be upstreamed. --- markdown_it/rules_block/blockquote.py | 3 +- markdown_it/rules_block/code.py | 4 +- markdown_it/rules_block/fence.py | 6 +- markdown_it/rules_block/heading.py | 3 +- markdown_it/rules_block/hr.py | 3 +- markdown_it/rules_block/html_block.py | 3 +- markdown_it/rules_block/lheading.py | 3 +- markdown_it/rules_block/list.py | 6 +- markdown_it/rules_block/reference.py | 3 +- markdown_it/rules_block/state_block.py | 9 +++ markdown_it/rules_block/table.py | 7 +- .../test_port/fixtures/disable_code_block.md | 69 +++++++++++++++++++ tests/test_port/test_fixtures.py | 11 +++ 13 files changed, 104 insertions(+), 26 deletions(-) create mode 100644 tests/test_port/fixtures/disable_code_block.md diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 3ca0321c..da57dfa5 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -18,8 +18,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> pos = state.bMarks[startLine] + state.tShift[startLine] max = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if (state.sCount[startLine] - state.blkIndent) >= 4: + if state.is_code_block(startLine): return False # check the block quote marker diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py index 69bd6bdc..89db9cec 100644 --- a/markdown_it/rules_block/code.py +++ b/markdown_it/rules_block/code.py @@ -9,7 +9,7 @@ def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent) - if state.sCount[startLine] - state.blkIndent < 4: + if not state.is_code_block(startLine): return False last = nextLine = startLine + 1 @@ -19,7 +19,7 @@ def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: nextLine += 1 continue - if state.sCount[nextLine] - state.blkIndent >= 4: + if state.is_code_block(nextLine): nextLine += 1 last = nextLine continue diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index 2bdd95f8..b4b28979 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -13,8 +13,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False if pos + 3 > maximum: @@ -72,8 +71,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool except IndexError: break - if state.sCount[nextLine] - state.blkIndent >= 4: - # closing fence should be indented less than 4 spaces + if state.is_code_block(nextLine): continue pos = state.skipChars(pos, marker) diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 564e1726..90847f9d 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -15,8 +15,7 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False ch: int | None = state.srcCharCode[pos] diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 72ea010d..6e6b907b 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -16,8 +16,7 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False try: diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index 4831f562..dc3cadb1 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -38,8 +38,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False if not state.md.options.get("html", None): diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index a3806f8e..beb56698 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -15,8 +15,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b ruler: Ruler = state.md.block.ruler terminatorRules = ruler.getRules("paragraph") - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False oldParentType = state.parentType diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index 1592b599..eaaccda5 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -102,8 +102,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> isTerminatingParagraph = False tight = True - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False # Special case: @@ -295,8 +294,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if state.sCount[nextLine] < state.blkIndent: break - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): break # fail if terminating block found diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 5689064b..48f12721 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -16,8 +16,7 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> maximum = state.eMarks[startLine] nextLine = startLine + 1 - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False if state.srcCharCode[pos] != 0x5B: # /* [ */ diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 7ddf806c..02f8dc9c 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -116,6 +116,9 @@ def __init__( self.lineMax = len(self.bMarks) - 1 # don't count last fake line + # pre-check if code blocks are enabled, to speed up is_code_block method + self._code_enabled = "code" in self.md["block"].ruler.get_active_rules() + def __repr__(self) -> str: return ( f"{self.__class__.__name__}" @@ -228,3 +231,9 @@ def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: i += 1 return "".join(queue) + + def is_code_block(self, line: int) -> bool: + """Check if line is a code block, + i.e. the code block rule is enabled and text is indented by more than 3 spaces. + """ + return self._code_enabled and (self.sCount[line] - self.blkIndent) >= 4 diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index c432d44f..8f7be7f1 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -61,8 +61,7 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if state.sCount[nextLine] < state.blkIndent: return False - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[nextLine] - state.blkIndent >= 4: + if state.is_code_block(nextLine): return False # first character of the second line should be '|', '-', ':', @@ -126,7 +125,7 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool lineText = getLine(state, startLine).strip() if "|" not in lineText: return False - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False columns = escapedSplit(lineText) if columns and columns[0] == "": @@ -192,7 +191,7 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool lineText = getLine(state, nextLine).strip() if not lineText: break - if state.sCount[nextLine] - state.blkIndent >= 4: + if state.is_code_block(nextLine): break columns = escapedSplit(lineText) if columns and columns[0] == "": diff --git a/tests/test_port/fixtures/disable_code_block.md b/tests/test_port/fixtures/disable_code_block.md new file mode 100644 index 00000000..35cf925c --- /dev/null +++ b/tests/test_port/fixtures/disable_code_block.md @@ -0,0 +1,69 @@ +indent paragraph +. + This is a paragraph, + with multiple lines. + + This paragraph +has variable indents, + like this. +. +

This is a paragraph, +with multiple lines.

+

This paragraph +has variable indents, +like this.

+. + +indent in HTML +. +
+ + Paragraph + +
+. +
+

Paragraph

+
+. + +indent fence +. + ```python + def foo(): + pass + ``` +. +
def foo():
+    pass
+
+. + +indent heading +. + # Heading +. +

Heading

+. + +indent table +. + | foo | bar | + | --- | --- | + | baz | bim | +. + + + + + + + + + + + + + +
foobar
bazbim
+. diff --git a/tests/test_port/test_fixtures.py b/tests/test_port/test_fixtures.py index d2199caf..74c7ee4d 100644 --- a/tests/test_port/test_fixtures.py +++ b/tests/test_port/test_fixtures.py @@ -104,6 +104,17 @@ def test_strikethrough(line, title, input, expected): assert text.rstrip() == expected.rstrip() +@pytest.mark.parametrize( + "line,title,input,expected", + read_fixture_file(FIXTURE_PATH.joinpath("disable_code_block.md")), +) +def test_disable_code_block(line, title, input, expected): + md = MarkdownIt().enable("table").disable("code") + text = md.render(input) + print(text.rstrip()) + assert text.rstrip() == expected.rstrip() + + @pytest.mark.parametrize( "line,title,input,expected", read_fixture_file(FIXTURE_PATH.joinpath("issue-fixes.md")), From 798b9d02bcc80b31773f68c05e5a1a82617d2798 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 22:03:04 +0200 Subject: [PATCH 12/28] =?UTF-8?q?=F0=9F=94=A7=20Move=20linting=20from=20fl?= =?UTF-8?q?ake8=20to=20ruff=20(#268)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 9 ++--- docs/conf.py | 2 +- markdown_it/common/normalize_url.py | 47 +++++++++++------------ markdown_it/main.py | 10 ++--- markdown_it/presets/__init__.py | 2 +- markdown_it/renderer.py | 25 ++++++------ markdown_it/ruler.py | 2 +- markdown_it/rules_block/fence.py | 5 +-- markdown_it/rules_block/list.py | 34 ++++++++-------- markdown_it/rules_block/reference.py | 23 ++++++----- markdown_it/rules_block/state_block.py | 9 +++-- markdown_it/rules_core/replacements.py | 47 ++++++++++++----------- markdown_it/rules_core/smartquotes.py | 16 ++++---- markdown_it/rules_inline/balance_pairs.py | 10 +++-- markdown_it/rules_inline/entity.py | 11 +++--- markdown_it/rules_inline/state_inline.py | 21 ++++------ markdown_it/token.py | 2 +- pyproject.toml | 5 +++ tests/test_cli.py | 5 +-- tox.ini | 4 -- 20 files changed, 141 insertions(+), 148 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 49f45ed2..2aecdc6d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,14 +33,13 @@ repos: hooks: - id: black - - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.270 hooks: - - id: flake8 - additional_dependencies: [flake8-bugbear~=22.7] + - id: ruff - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.2.0 + rev: v1.3.0 hooks: - id: mypy additional_dependencies: [mdurl] diff --git a/docs/conf.py b/docs/conf.py index e0a6e621..6a6ee557 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -121,7 +121,7 @@ def run_apidoc(app): shutil.rmtree(api_folder) os.mkdir(api_folder) - argv = ["-M", "--separate", "-o", api_folder, module_path] + ignore_paths + argv = ["-M", "--separate", "-o", api_folder, module_path, *ignore_paths] apidoc.OPTIONS.append("ignore-module-all") apidoc.main(argv) diff --git a/markdown_it/common/normalize_url.py b/markdown_it/common/normalize_url.py index a4ebbaae..92720b31 100644 --- a/markdown_it/common/normalize_url.py +++ b/markdown_it/common/normalize_url.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Callable +from contextlib import suppress import re from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401 @@ -21,18 +22,17 @@ def normalizeLink(url: str) -> str: """ parsed = mdurl.parse(url, slashes_denote_host=True) - if parsed.hostname: - # Encode hostnames in urls like: - # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` - # - # We don't encode unknown schemas, because it's likely that we encode - # something we shouldn't (e.g. `skype:name` treated as `skype:host`) - # - if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR: - try: - parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname)) - except Exception: - pass + # Encode hostnames in urls like: + # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` + # + # We don't encode unknown schemas, because it's likely that we encode + # something we shouldn't (e.g. `skype:name` treated as `skype:host`) + # + if parsed.hostname and ( + not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR + ): + with suppress(Exception): + parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname)) return mdurl.encode(mdurl.format(parsed)) @@ -47,18 +47,17 @@ def normalizeLinkText(url: str) -> str: """ parsed = mdurl.parse(url, slashes_denote_host=True) - if parsed.hostname: - # Encode hostnames in urls like: - # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` - # - # We don't encode unknown schemas, because it's likely that we encode - # something we shouldn't (e.g. `skype:name` treated as `skype:host`) - # - if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR: - try: - parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname)) - except Exception: - pass + # Encode hostnames in urls like: + # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` + # + # We don't encode unknown schemas, because it's likely that we encode + # something we shouldn't (e.g. `skype:name` treated as `skype:host`) + # + if parsed.hostname and ( + not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR + ): + with suppress(Exception): + parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname)) # add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720 return mdurl.decode(mdurl.format(parsed), mdurl.DECODE_DEFAULT_CHARS + "%") diff --git a/markdown_it/main.py b/markdown_it/main.py index acf8d079..243e1509 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -4,11 +4,11 @@ from contextlib import contextmanager from typing import Any, Literal, overload -from . import helpers, presets # noqa F401 -from .common import normalize_url, utils # noqa F401 -from .parser_block import ParserBlock # noqa F401 -from .parser_core import ParserCore # noqa F401 -from .parser_inline import ParserInline # noqa F401 +from . import helpers, presets +from .common import normalize_url, utils +from .parser_block import ParserBlock +from .parser_core import ParserCore +from .parser_inline import ParserInline from .renderer import RendererHTML, RendererProtocol from .rules_core.state_core import StateCore from .token import Token diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py index 22cf74cb..f1cb0507 100644 --- a/markdown_it/presets/__init__.py +++ b/markdown_it/presets/__init__.py @@ -6,7 +6,7 @@ js_default = default -class gfm_like: +class gfm_like: # noqa: N801 """GitHub Flavoured Markdown (GFM) like. This adds the linkify, table and strikethrough components to CommmonMark. diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 4cddbc67..7fee9ffa 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -152,19 +152,18 @@ def renderToken( if token.block: needLf = True - if token.nesting == 1: - if idx + 1 < len(tokens): - nextToken = tokens[idx + 1] - - if nextToken.type == "inline" or nextToken.hidden: - # Block-level tag containing an inline tag. - # - needLf = False - - elif nextToken.nesting == -1 and nextToken.tag == token.tag: - # Opening tag + closing tag of the same type. E.g. `
  • `. - # - needLf = False + if token.nesting == 1 and (idx + 1 < len(tokens)): + nextToken = tokens[idx + 1] + + if nextToken.type == "inline" or nextToken.hidden: # noqa: SIM114 + # Block-level tag containing an inline tag. + # + needLf = False + + elif nextToken.nesting == -1 and nextToken.tag == token.tag: + # Opening tag + closing tag of the same type. E.g. `
  • `. + # + needLf = False result += ">\n" if needLf else ">" diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 421666cc..8ae32beb 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -30,7 +30,7 @@ class Ruler class StateBase: - srcCharCode: tuple[int, ...] + srcCharCode: tuple[int, ...] # noqa: N815 def __init__(self, src: str, md: MarkdownIt, env: EnvType): self.src = src diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index b4b28979..2051b96b 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -38,9 +38,8 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool params = state.src[pos:maximum] # /* ` */ - if marker == 0x60: - if chr(marker) in params: - return False + if marker == 0x60 and chr(marker) in params: + return False # Since start is found, we can report success here in validation mode if silent: diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index eaaccda5..f1cb089e 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -120,14 +120,17 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # limit conditions when list can interrupt # a paragraph (validation mode only) - if silent and state.parentType == "paragraph": - # Next list item should still terminate previous list item - # - # This code can fail if plugins use blkIndent as well as lists, - # but I hope the spec gets fixed long before that happens. - # - if state.tShift[startLine] >= state.blkIndent: - isTerminatingParagraph = True + # Next list item should still terminate previous list item + # + # This code can fail if plugins use blkIndent as well as lists, + # but I hope the spec gets fixed long before that happens. + # + if ( + silent + and state.parentType == "paragraph" + and state.tShift[startLine] >= state.blkIndent + ): + isTerminatingParagraph = True # Detect list type and position after marker posAfterMarker = skipOrderedListMarker(state, startLine) @@ -149,9 +152,11 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # If we're starting a new unordered list right after # a paragraph, first line should not be empty. - if isTerminatingParagraph: - if state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]: - return False + if ( + isTerminatingParagraph + and state.skipSpaces(posAfterMarker) >= state.eMarks[startLine] + ): + return False # We should terminate list on style change. Remember first one to compare. markerCharCode = state.srcCharCode[posAfterMarker - 1] @@ -209,11 +214,8 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> contentStart = pos - if contentStart >= maximum: - # trimming space in "- \n 3" case, indent is 1 here - indentAfterMarker = 1 - else: - indentAfterMarker = offset - initial + # trimming space in "- \n 3" case, indent is 1 here + indentAfterMarker = 1 if contentStart >= maximum else offset - initial # If we have more than 4 spaces, the indent is 1 # (the rest is just indented code block) diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 48f12721..92f0918c 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -153,18 +153,17 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> break pos += 1 - if pos < maximum and charCodeAt(string, pos) != 0x0A: - if title: - # garbage at the end of the line after title, - # but it could still be a valid reference if we roll back - title = "" - pos = destEndPos - lines = destEndLineNo - while pos < maximum: - ch = charCodeAt(string, pos) - if not isSpace(ch): - break - pos += 1 + if pos < maximum and charCodeAt(string, pos) != 0x0A and title: + # garbage at the end of the line after title, + # but it could still be a valid reference if we roll back + title = "" + pos = destEndPos + lines = destEndLineNo + while pos < maximum: + ch = charCodeAt(string, pos) + if not isSpace(ch): + break + pos += 1 if pos < maximum and charCodeAt(string, pos) != 0x0A: # garbage at the end of the line diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 02f8dc9c..ee77f097 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -202,10 +202,11 @@ def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: while line < end: lineIndent = 0 lineStart = first = self.bMarks[line] - if line + 1 < end or keepLastLF: - last = self.eMarks[line] + 1 - else: - last = self.eMarks[line] + last = ( + self.eMarks[line] + 1 + if line + 1 < end or keepLastLF + else self.eMarks[line] + ) while (first < last) and (lineIndent < indent): ch = self.srcCharCode[first] diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index e5d81c7a..0b6e86af 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -78,29 +78,30 @@ def replace_rare(inlineTokens: list[Token]) -> None: inside_autolink = 0 for token in inlineTokens: - if token.type == "text" and not inside_autolink: - if RARE_RE.search(token.content): - # +- -> ± - token.content = PLUS_MINUS_RE.sub("±", token.content) - - # .., ..., ....... -> … - token.content = ELLIPSIS_RE.sub("…", token.content) - - # but ?..... & !..... -> ?.. & !.. - token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub( - "\\1..", token.content - ) - token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content) - - # ,, ,,, ,,,, -> , - token.content = COMMA_RE.sub(",", token.content) - - # em-dash - token.content = EM_DASH_RE.sub("\\1\u2014", token.content) - - # en-dash - token.content = EN_DASH_RE.sub("\\1\u2013", token.content) - token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content) + if ( + token.type == "text" + and (not inside_autolink) + and RARE_RE.search(token.content) + ): + # +- -> ± + token.content = PLUS_MINUS_RE.sub("±", token.content) + + # .., ..., ....... -> … + token.content = ELLIPSIS_RE.sub("…", token.content) + + # but ?..... & !..... -> ?.. & !.. + token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub("\\1..", token.content) + token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content) + + # ,, ,,, ,,,, -> , + token.content = COMMA_RE.sub(",", token.content) + + # em-dash + token.content = EM_DASH_RE.sub("\\1\u2014", token.content) + + # en-dash + token.content = EN_DASH_RE.sub("\\1\u2013", token.content) + token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content) if token.type == "link_open" and token.info == "auto": inside_autolink -= 1 diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index b11a5739..b4284493 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -100,19 +100,17 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: isLastWhiteSpace = isWhiteSpace(lastChar) isNextWhiteSpace = isWhiteSpace(nextChar) - if isNextWhiteSpace: + if isNextWhiteSpace: # noqa: SIM114 + canOpen = False + elif isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar): canOpen = False - elif isNextPunctChar: - if not (isLastWhiteSpace or isLastPunctChar): - canOpen = False - if isLastWhiteSpace: + if isLastWhiteSpace: # noqa: SIM114 + canClose = False + elif isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar): canClose = False - elif isLastPunctChar: - if not (isNextWhiteSpace or isNextPunctChar): - canClose = False - if nextChar == 0x22 and t.group(0) == '"': # 0x22: " + if nextChar == 0x22 and t.group(0) == '"': # 0x22: " # noqa: SIM102 if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9 # special case: 1"" - count first quote as an inch canClose = canOpen = False diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index ce0a0884..6125de71 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -60,10 +60,12 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: # closing delimiters must not be a multiple of 3 unless both lengths # are multiples of 3. # - if opener.close or closer.open: - if (opener.length + closer.length) % 3 == 0: - if opener.length % 3 != 0 or closer.length % 3 != 0: - isOddMatch = True + if ( + (opener.close or closer.open) + and ((opener.length + closer.length) % 3 == 0) + and (opener.length % 3 != 0 or closer.length % 3 != 0) + ): + isOddMatch = True if not isOddMatch: # If previous delimiter cannot be an opener, we can safely skip diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 9c4c6a0e..1e5d0ea0 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -40,12 +40,11 @@ def entity(state: StateInline, silent: bool) -> bool: else: match = NAMED_RE.search(state.src[pos:]) - if match: - if match.group(1) in entities: - if not silent: - state.pending += entities[match.group(1)] - state.pos += len(match.group(0)) - return True + if match and match.group(1) in entities: + if not silent: + state.pending += entities[match.group(1)] + state.pos += len(match.group(0)) + return True if not silent: state.pending += "&" diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 7c1cb1f3..12e1d934 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -131,8 +131,6 @@ def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: """ pos = start - left_flanking = True - right_flanking = True maximum = self.posMax marker = self.srcCharCode[start] @@ -153,17 +151,14 @@ def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: isLastWhiteSpace = isWhiteSpace(lastChar) isNextWhiteSpace = isWhiteSpace(nextChar) - if isNextWhiteSpace: - left_flanking = False - elif isNextPunctChar: - if not (isLastWhiteSpace or isLastPunctChar): - left_flanking = False - - if isLastWhiteSpace: - right_flanking = False - elif isLastPunctChar: - if not (isNextWhiteSpace or isNextPunctChar): - right_flanking = False + left_flanking = not ( + isNextWhiteSpace + or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar)) + ) + right_flanking = not ( + isLastWhiteSpace + or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar)) + ) if not canSplitWord: can_open = left_flanking and ((not right_flanking) or isLastPunctChar) diff --git a/markdown_it/token.py b/markdown_it/token.py index e3f6c9b9..90008b72 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -80,7 +80,7 @@ def __post_init__(self) -> None: self.attrs = convert_attrs(self.attrs) def attrIndex(self, name: str) -> int: - warnings.warn( + warnings.warn( # noqa: B028 "Token.attrIndex should not be used, since Token.attrs is a dictionary", UserWarning, ) diff --git a/pyproject.toml b/pyproject.toml index acf2a288..22b220c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,11 @@ exclude = [ profile = "black" force_sort_within_sections = true +[tool.ruff] +line-length = 100 +extend-select = ["B0", "C4", "ICN", "ISC", "N", "RUF", "SIM"] +extend-ignore = ["ISC003", "N802", "N803", "N806", "N816", "RUF003"] + [tool.mypy] show_error_codes = true warn_unused_ignores = true diff --git a/tests/test_cli.py b/tests/test_cli.py index c38e24fd..ed8d8205 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -37,7 +37,6 @@ def test_interactive(): def mock_input(prompt): raise KeyboardInterrupt - with patch("builtins.print") as patched: - with patch("builtins.input", mock_input): - parse.interactive() + with patch("builtins.print") as patched, patch("builtins.input", mock_input): + parse.interactive() patched.assert_called() diff --git a/tox.ini b/tox.ini index 251e18df..59ea5f9e 100644 --- a/tox.ini +++ b/tox.ini @@ -66,7 +66,3 @@ description = run fuzzer on testcase file deps = atheris commands_pre = python scripts/build_fuzzers.py {envdir}/oss-fuzz commands = python {envdir}/oss-fuzz/infra/helper.py reproduce markdown-it-py fuzz_markdown {posargs:testcase} - -[flake8] -max-line-length = 100 -extend-ignore = E203 From c6754a2fda48f19312a1e73cbb53e7a355f36165 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 22:32:18 +0200 Subject: [PATCH 13/28] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20re-activate=20code?= =?UTF-8?q?=20cells=20(#269)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/_static/custom.css | 2 +- docs/conf.py | 15 +--------- docs/using.md | 66 ++++++++++++++++++++--------------------- pyproject.toml | 1 + 4 files changed, 36 insertions(+), 48 deletions(-) diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 9a16010b..a6c44314 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,4 +1,4 @@ -.code-cell > .highlight > pre { +.cell_output > .output > .highlight > pre { border-left-color: green; border-left-width: medium; border-left-style: solid; diff --git a/docs/conf.py b/docs/conf.py index 6a6ee557..2b48df1e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -36,6 +36,7 @@ "myst_parser", "sphinx_copybutton", "sphinx_design", + "jupyter_sphinx", ] # List of patterns, relative to source directory, that match files and @@ -135,17 +136,3 @@ def setup(app): """Add functions to the Sphinx setup.""" if os.environ.get("SKIP_APIDOC", None) is None: app.connect("builder-inited", run_apidoc) - - from sphinx.directives.code import CodeBlock - - class CodeCell(CodeBlock): - """Custom code block directive.""" - - def run(self): - """Run the directive.""" - self.options["class"] = ["code-cell"] - return super().run() - - # note, these could be run by myst-nb, - # but currently this causes a circular dependency issue - app.add_directive("code-cell", CodeCell) diff --git a/docs/using.md b/docs/using.md index aa632574..e2cf7e7e 100644 --- a/docs/using.md +++ b/docs/using.md @@ -27,17 +27,17 @@ then these are converted to other formats using 'renderers'. The simplest way to understand how text will be parsed is using: -```{code-cell} python +```{jupyter-execute} from pprint import pprint from markdown_it import MarkdownIt ``` -```{code-cell} python +```{jupyter-execute} md = MarkdownIt() md.render("some *text*") ``` -```{code-cell} python +```{jupyter-execute} for token in md.parse("some *text*"): print(token) print() @@ -59,24 +59,24 @@ You can define this configuration *via* directly supplying a dictionary or a pre Compared to `commonmark`, it enables the table, strikethrough and linkify components. **Important**, to use this configuration you must have `linkify-it-py` installed. -```{code-cell} python +```{jupyter-execute} from markdown_it.presets import zero zero.make() ``` -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("zero") md.options ``` You can also override specific options: -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("zero", {"maxNesting": 99}) md.options ``` -```{code-cell} python +```{jupyter-execute} pprint(md.get_active_rules()) ``` @@ -84,23 +84,23 @@ You can find all the parsing rules in the source code: `parser_core.py`, `parser_block.py`, `parser_inline.py`. -```{code-cell} python +```{jupyter-execute} pprint(md.get_all_rules()) ``` Any of the parsing rules can be enabled/disabled, and these methods are "chainable": -```{code-cell} python +```{jupyter-execute} md.render("- __*emphasise this*__") ``` -```{code-cell} python +```{jupyter-execute} md.enable(["list", "emphasis"]).render("- __*emphasise this*__") ``` You can temporarily modify rules with the `reset_rules` context manager. -```{code-cell} python +```{jupyter-execute} with md.reset_rules(): md.disable("emphasis") print(md.render("__*emphasise this*__")) @@ -109,7 +109,7 @@ md.render("__*emphasise this*__") Additionally `renderInline` runs the parser with all block syntax rules disabled. -```{code-cell} python +```{jupyter-execute} md.renderInline("__*emphasise this*__") ``` @@ -140,7 +140,7 @@ The `smartquotes` and `replacements` components are intended to improve typograp Both of these components require typography to be turned on, as well as the components enabled: -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("commonmark", {"typographer": True}) md.enable(["replacements", "smartquotes"]) md.render("'single quotes' (c)") @@ -151,7 +151,7 @@ md.render("'single quotes' (c)") The `linkify` component requires that [linkify-it-py](https://github.com/tsutsu3/linkify-it-py) be installed (e.g. *via* `pip install markdown-it-py[linkify]`). This allows URI autolinks to be identified, without the need for enclosing in `<>` brackets: -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("commonmark", {"linkify": True}) md.enable(["linkify"]) md.render("github.com") @@ -163,7 +163,7 @@ Plugins load collections of additional syntax rules and render methods into the A number of useful plugins are available in [`mdit_py_plugins`](https://github.com/executablebooks/mdit-py-plugins) (see [the plugin list](./plugins.md)), or you can create your own (following the [markdown-it design principles](./architecture.md)). -```{code-cell} python +```{jupyter-execute} from markdown_it import MarkdownIt import mdit_py_plugins from mdit_py_plugins.front_matter import front_matter_plugin @@ -175,7 +175,7 @@ md = ( .use(footnote_plugin) .enable('table') ) -text = (""" +text = ("""\ --- a: 1 --- @@ -188,7 +188,7 @@ A footnote [^1] [^1]: some details """) -md.render(text) +print(md.render(text)) ``` ## The Token Stream @@ -197,7 +197,7 @@ md.render(text) Before rendering, the text is parsed to a flat token stream of block level syntax elements, with nesting defined by opening (1) and closing (-1) attributes: -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("commonmark") tokens = md.parse(""" Here's some *text* @@ -211,17 +211,17 @@ Here's some *text* Naturally all openings should eventually be closed, such that: -```{code-cell} python +```{jupyter-execute} sum([t.nesting for t in tokens]) == 0 ``` All tokens are the same class, which can also be created outside the parser: -```{code-cell} python +```{jupyter-execute} tokens[0] ``` -```{code-cell} python +```{jupyter-execute} from markdown_it.token import Token token = Token("paragraph_open", "p", 1, block=True, map=[1, 2]) token == tokens[0] @@ -229,19 +229,19 @@ token == tokens[0] The `'inline'` type token contain the inline tokens as children: -```{code-cell} python +```{jupyter-execute} tokens[1] ``` You can serialize a token (and its children) to a JSONable dictionary using: -```{code-cell} python +```{jupyter-execute} print(tokens[1].as_dict()) ``` This dictionary can also be deserialized: -```{code-cell} python +```{jupyter-execute} Token.from_dict(tokens[1].as_dict()) ``` @@ -254,7 +254,7 @@ Token.from_dict(tokens[1].as_dict()) In some use cases it may be useful to convert the token stream into a syntax tree, with opening/closing tokens collapsed into a single token that contains children. -```{code-cell} python +```{jupyter-execute} from markdown_it.tree import SyntaxTreeNode md = MarkdownIt("commonmark") @@ -274,11 +274,11 @@ print(node.pretty(indent=2, show_text=True)) You can then use methods to traverse the tree -```{code-cell} python +```{jupyter-execute} node.children ``` -```{code-cell} python +```{jupyter-execute} print(node[0]) node[0].next_sibling ``` @@ -302,7 +302,7 @@ def function(renderer, tokens, idx, options, env): You can inject render methods into the instantiated render class. -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("commonmark") def render_em_open(self, tokens, idx, options, env): @@ -319,7 +319,7 @@ Also `add_render_rule` method is specific to Python, rather than adding directly You can also subclass a render and add the method there: -```{code-cell} python +```{jupyter-execute} from markdown_it.renderer import RendererHTML class MyRenderer(RendererHTML): @@ -332,7 +332,7 @@ md.render("*a*") Plugins can support multiple render types, using the `__output__` attribute (this is currently a Python only feature). -```{code-cell} python +```{jupyter-execute} from markdown_it.renderer import RendererHTML class MyRenderer1(RendererHTML): @@ -358,7 +358,7 @@ print(md.render("*a*")) Here's a more concrete example; let's replace images with vimeo links to player's iframe: -```{code-cell} python +```{jupyter-execute} import re from markdown_it import MarkdownIt @@ -384,7 +384,7 @@ print(md.render("![](https://www.vimeo.com/123)")) Here is another example, how to add `target="_blank"` to all links: -```{code-cell} python +```{jupyter-execute} from markdown_it import MarkdownIt def render_blank_link(self, tokens, idx, options, env): @@ -402,7 +402,7 @@ print(md.render("[a]\n\n[a]: b")) You can also render a token stream directly to markdown via the `MDRenderer` class from [`mdformat`](https://github.com/executablebooks/mdformat): -```{code-cell} python +```python from markdown_it import MarkdownIt from mdformat.renderer import MDRenderer diff --git a/pyproject.toml b/pyproject.toml index 22b220c8..b0d64fb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ rtd = [ "sphinx-copybutton", "sphinx-design", "sphinx_book_theme", + "jupyter_sphinx", ] testing = [ "coverage", From f52249e1c26c7e66c8504848f582fcd3de85ab3d Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 1 Jun 2023 03:45:46 +0200 Subject: [PATCH 14/28] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20REFACTOR:=20Replace?= =?UTF-8?q?=20character=20codes=20with=20strings=20(#270)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The use of `StateBase.srcCharCode` is deprecated (with backward-compatibility), and all core uses are replaced by `StateBase.src`. Conversion of source string characters to an integer representing the Unicode character is prevalent in the upstream JavaScript implementation, to improve performance. However, it is unnecessary in Python and leads to harder to read code and performance deprecations (during the conversion in the `StateBase` initialisation). `StateBase.srcCharCode` is no longer populated on initiation, but is left as an on-demand, cached property, to allow backward compatibility for plugins (deprecation warnings are emitted to identify where updates are required). `isStrSpace` is supplied as a replacement for `isSpace`, and similarly `StateBlock.skipCharsStr`/`StateBlock.skipCharsStrBack` replace `StateBlock.skipChars`/`StateBlock.skipCharsBack` Co-authored-by: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> --- markdown_it/common/utils.py | 34 ++++++++-- markdown_it/helpers/parse_link_destination.py | 2 +- markdown_it/helpers/parse_link_label.py | 6 +- markdown_it/main.py | 2 +- markdown_it/parser_block.py | 9 +-- markdown_it/port.yaml | 3 +- markdown_it/ruler.py | 16 ++++- markdown_it/rules_block/blockquote.py | 32 +++++----- markdown_it/rules_block/fence.py | 14 ++--- markdown_it/rules_block/heading.py | 20 +++--- markdown_it/rules_block/hr.py | 14 ++--- markdown_it/rules_block/html_block.py | 2 +- markdown_it/rules_block/lheading.py | 13 ++-- markdown_it/rules_block/list.py | 48 +++++++------- markdown_it/rules_block/reference.py | 6 +- markdown_it/rules_block/state_block.py | 59 +++++++++-------- markdown_it/rules_block/table.py | 33 +++++----- markdown_it/rules_core/block.py | 4 +- markdown_it/rules_core/smartquotes.py | 25 ++++---- markdown_it/rules_inline/autolink.py | 8 +-- markdown_it/rules_inline/backticks.py | 8 +-- markdown_it/rules_inline/emphasis.py | 11 ++-- markdown_it/rules_inline/entity.py | 6 +- markdown_it/rules_inline/escape.py | 52 +++++++++++---- markdown_it/rules_inline/html_inline.py | 11 +--- markdown_it/rules_inline/image.py | 30 +++++---- markdown_it/rules_inline/link.py | 22 +++---- markdown_it/rules_inline/newline.py | 11 ++-- markdown_it/rules_inline/state_inline.py | 16 ++--- markdown_it/rules_inline/strikethrough.py | 21 +++---- markdown_it/rules_inline/text.py | 63 +++++++++---------- scripts/profiler.py | 2 +- 32 files changed, 321 insertions(+), 282 deletions(-) diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index ed862e74..4effc00f 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -4,12 +4,12 @@ import html import re -from typing import Any, Match, TypeVar +from typing import Match, TypeVar from .entities import entities -def charCodeAt(src: str, pos: int) -> Any: +def charCodeAt(src: str, pos: int) -> int | None: """ Returns the Unicode value of the character at the specified location. @@ -24,6 +24,21 @@ def charCodeAt(src: str, pos: int) -> Any: return None +def charStrAt(src: str, pos: int) -> str | None: + """ + Returns the Unicode value of the character at the specified location. + + @param - index The zero-based index of the desired character. + If there is no character at the specified index, NaN is returned. + + This was added for compatibility with python + """ + try: + return src[pos] + except IndexError: + return None + + _ItemTV = TypeVar("_ItemTV") @@ -96,7 +111,7 @@ def replaceEntityPattern(match: str, name: str) -> str: if name in entities: return entities[name] - if ord(name[0]) == 0x23 and DIGITAL_ENTITY_TEST_RE.search(name): + if name[0] == "#" and DIGITAL_ENTITY_TEST_RE.search(name): code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10) if isValidEntityCode(code): return fromCodePoint(code) @@ -178,8 +193,14 @@ def escapeRE(string: str) -> str: # ////////////////////////////////////////////////////////////////////////////// -def isSpace(code: object) -> bool: - return code in {0x09, 0x20} +def isSpace(code: int | None) -> bool: + """Check if character code is a whitespace.""" + return code in (0x09, 0x20) + + +def isStrSpace(ch: str | None) -> bool: + """Check if character is a whitespace.""" + return ch in ("\t", " ") MD_WHITESPACE = { @@ -188,7 +209,7 @@ def isSpace(code: object) -> bool: 0x0B, # \v 0x0C, # \f 0x0D, # \r - 0x20, + 0x20, # space 0xA0, 0x1680, 0x202F, @@ -213,6 +234,7 @@ def isWhiteSpace(code: int) -> bool: # Currently without astral characters support. def isPunctChar(ch: str) -> bool: + """Check if character is a punctuation character.""" return UNICODE_PUNCT_RE.search(ch) is not None diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index d527ce0c..f42b2244 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -49,7 +49,7 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result: while pos < maximum: code = charCodeAt(string, pos) - if code == 0x20: + if code is None or code == 0x20: break # ascii control characters diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py index 6ce8daf8..01c653c5 100644 --- a/markdown_it/helpers/parse_link_label.py +++ b/markdown_it/helpers/parse_link_label.py @@ -17,8 +17,8 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) level = 1 while state.pos < state.posMax: - marker = state.srcCharCode[state.pos] - if marker == 0x5D: # /* ] */) + marker = state.src[state.pos] + if marker == "]": level -= 1 if level == 0: found = True @@ -26,7 +26,7 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) prevPos = state.pos state.md.inline.skipToken(state) - if marker == 0x5B: # /* [ */) + if marker == "[": if prevPos == state.pos - 1: # increase level if we find text `[`, # which is not a part of any token diff --git a/markdown_it/main.py b/markdown_it/main.py index 243e1509..bb294a99 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -46,7 +46,7 @@ def __init__( """ # add modules self.utils = utils - self.helpers: Any = helpers + self.helpers = helpers # initialise classes self.inline = ParserInline() diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index cd240a8a..86f08cf5 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -97,16 +97,11 @@ def tokenize( state.line = line def parse( - self, - src: str, - md: MarkdownIt, - env: EnvType, - outTokens: list[Token], - ords: tuple[int, ...] | None = None, + self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token] ) -> list[Token] | None: """Process input string and push block tokens into `outTokens`.""" if not src: return None - state = StateBlock(src, md, env, outTokens, ords) + state = StateBlock(src, md, env, outTokens) self.tokenize(state, state.line, state.lineMax) return state.tokens diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index a6718fda..945a19f6 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -23,8 +23,7 @@ to manipulate `Token.attrs`, which have an identical signature to those upstream. - Use python version of `charCodeAt` - | - Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state - objects and sharing those whenever possible + Use `str` units instead of `int`s to represent Unicode codepoints. This provides a significant performance boost - | In markdown_it/rules_block/reference.py, diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 8ae32beb..9849561d 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -20,6 +20,7 @@ class Ruler from collections.abc import Callable, Iterable from dataclasses import dataclass, field from typing import TYPE_CHECKING, TypedDict +import warnings from markdown_it._compat import DATACLASS_KWARGS @@ -30,8 +31,6 @@ class Ruler class StateBase: - srcCharCode: tuple[int, ...] # noqa: N815 - def __init__(self, src: str, md: MarkdownIt, env: EnvType): self.src = src self.env = env @@ -44,7 +43,18 @@ def src(self) -> str: @src.setter def src(self, value: str) -> None: self._src = value - self.srcCharCode = tuple(ord(c) for c in self.src) + self._srcCharCode: tuple[int, ...] | None = None + + @property + def srcCharCode(self) -> tuple[int, ...]: + warnings.warn( + "StateBase.srcCharCode is deprecated. Use StateBase.src instead.", + DeprecationWarning, + stacklevel=2, + ) + if self._srcCharCode is None: + self._srcCharCode = tuple(ord(c) for c in self._src) + return self._srcCharCode # The first positional arg is always a subtype of `StateBase`. Other diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index da57dfa5..0c9081b9 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -3,7 +3,7 @@ import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -23,7 +23,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # check the block quote marker try: - if state.srcCharCode[pos] != 0x3E: # /* > */ + if state.src[pos] != ">": return False except IndexError: return False @@ -38,12 +38,12 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> initial = offset = state.sCount[startLine] + 1 try: - second_char_code: int | None = state.srcCharCode[pos] + second_char: str | None = state.src[pos] except IndexError: - second_char_code = None + second_char = None # skip one optional space after '>' - if second_char_code == 0x20: # /* space */ + if second_char == " ": # ' > test ' # ^ -- position start of line here: pos += 1 @@ -51,7 +51,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> offset += 1 adjustTab = False spaceAfterMarker = True - elif second_char_code == 0x09: # /* tab */ + elif second_char == "\t": spaceAfterMarker = True if (state.bsCount[startLine] + offset) % 4 == 3: @@ -74,10 +74,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.bMarks[startLine] = pos while pos < max: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if isSpace(ch): - if ch == 0x09: # / tab / + if isStrSpace(ch): + if ch == "\t": offset += ( 4 - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4 @@ -147,7 +147,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # Case 1: line is not inside the blockquote, and this line is empty. break - evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */ + evaluatesTrue = state.src[pos] == ">" and not isOutdented pos += 1 if evaluatesTrue: # This line is inside the blockquote. @@ -156,12 +156,12 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> initial = offset = state.sCount[nextLine] + 1 try: - next_char: int | None = state.srcCharCode[pos] + next_char: str | None = state.src[pos] except IndexError: next_char = None # skip one optional space after '>' - if next_char == 0x20: # /* space */ + if next_char == " ": # ' > test ' # ^ -- position start of line here: pos += 1 @@ -169,7 +169,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> offset += 1 adjustTab = False spaceAfterMarker = True - elif next_char == 0x09: # /* tab */ + elif next_char == "\t": spaceAfterMarker = True if (state.bsCount[nextLine] + offset) % 4 == 3: @@ -192,10 +192,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.bMarks[nextLine] = pos while pos < max: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if isSpace(ch): - if ch == 0x09: + if isStrSpace(ch): + if ch == "\t": offset += ( 4 - ( diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index 2051b96b..263f1b8d 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -19,15 +19,14 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if pos + 3 > maximum: return False - marker = state.srcCharCode[pos] + marker = state.src[pos] - # /* ~ */ /* ` */ - if marker != 0x7E and marker != 0x60: + if marker not in ("~", "`"): return False # scan marker length mem = pos - pos = state.skipChars(pos, marker) + pos = state.skipCharsStr(pos, marker) length = pos - mem @@ -37,8 +36,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool markup = state.src[mem:pos] params = state.src[pos:maximum] - # /* ` */ - if marker == 0x60 and chr(marker) in params: + if marker == "`" and marker in params: return False # Since start is found, we can report success here in validation mode @@ -65,7 +63,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool break try: - if state.srcCharCode[pos] != marker: + if state.src[pos] != marker: continue except IndexError: break @@ -73,7 +71,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if state.is_code_block(nextLine): continue - pos = state.skipChars(pos, marker) + pos = state.skipCharsStr(pos, marker) # closing code fence must be at least as long as the opening one if pos - mem < length: diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 90847f9d..850ffb50 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -3,7 +3,7 @@ import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -18,29 +18,27 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo if state.is_code_block(startLine): return False - ch: int | None = state.srcCharCode[pos] + ch: str | None = state.src[pos] - # /* # */ - if ch != 0x23 or pos >= maximum: + if ch != "#" or pos >= maximum: return False # count heading level level = 1 pos += 1 try: - ch = state.srcCharCode[pos] + ch = state.src[pos] except IndexError: ch = None - # /* # */ - while ch == 0x23 and pos < maximum and level <= 6: + while ch == "#" and pos < maximum and level <= 6: level += 1 pos += 1 try: - ch = state.srcCharCode[pos] + ch = state.src[pos] except IndexError: ch = None - if level > 6 or (pos < maximum and not isSpace(ch)): + if level > 6 or (pos < maximum and not isStrSpace(ch)): return False if silent: @@ -49,8 +47,8 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo # Let's cut tails like ' ### ' from the end of string maximum = state.skipSpacesBack(maximum, pos) - tmp = state.skipCharsBack(maximum, 0x23, pos) # # - if tmp > pos and isSpace(state.srcCharCode[tmp - 1]): + tmp = state.skipCharsStrBack(maximum, "#", pos) + if tmp > pos and isStrSpace(state.src[tmp - 1]): maximum = tmp state.line = startLine + 1 diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 6e6b907b..16df05f2 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -4,7 +4,7 @@ """ import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -20,22 +20,22 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: return False try: - marker = state.srcCharCode[pos] + marker = state.src[pos] except IndexError: return False pos += 1 - # Check hr marker: /* * */ /* - */ /* _ */ - if marker != 0x2A and marker != 0x2D and marker != 0x5F: + # Check hr marker + if marker not in ("*", "-", "_"): return False # markers can be mixed with spaces, but there should be at least 3 of them cnt = 1 while pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] pos += 1 - if ch != marker and not isSpace(ch): + if ch != marker and not isStrSpace(ch): return False if ch == marker: cnt += 1 @@ -50,6 +50,6 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: token = state.push("hr", "hr", 0) token.map = [startLine, state.line] - token.markup = chr(marker) * (cnt + 1) + token.markup = marker * (cnt + 1) return True diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index dc3cadb1..3d43f6ee 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -44,7 +44,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if not state.md.options.get("html", None): return False - if state.srcCharCode[pos] != 0x3C: # /* < */ + if state.src[pos] != "<": return False lineText = state.src[pos:maximum] diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index beb56698..fbd50699 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -35,16 +35,15 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b maximum = state.eMarks[nextLine] if pos < maximum: - marker = state.srcCharCode[pos] + marker = state.src[pos] - # /* - */ /* = */ - if marker == 0x2D or marker == 0x3D: - pos = state.skipChars(pos, marker) + if marker in ("-", "="): + pos = state.skipCharsStr(pos, marker) pos = state.skipSpaces(pos) # /* = */ if pos >= maximum: - level = 1 if marker == 0x3D else 2 + level = 1 if marker == "=" else 2 break # quirk for blockquotes, this line should already be checked by that rule @@ -72,7 +71,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b state.line = nextLine + 1 token = state.push("heading_open", "h" + str(level), 1) - token.markup = chr(marker) + token.markup = marker token.map = [startLine, state.line] token = state.push("inline", "", 0) @@ -81,7 +80,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b token.children = [] token = state.push("heading_close", "h" + str(level), -1) - token.markup = chr(marker) + token.markup = marker state.parentType = oldParentType diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index f1cb089e..a5c596bb 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -1,7 +1,7 @@ # Lists import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -14,18 +14,18 @@ def skipBulletListMarker(state: StateBlock, startLine: int) -> int: maximum = state.eMarks[startLine] try: - marker = state.srcCharCode[pos] + marker = state.src[pos] except IndexError: return -1 pos += 1 - # Check bullet /* * */ /* - */ /* + */ - if marker != 0x2A and marker != 0x2D and marker != 0x2B: + + if marker not in ("*", "-", "+"): return -1 if pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if not isSpace(ch): + if not isStrSpace(ch): # " -test " - is not a list item return -1 @@ -43,11 +43,12 @@ def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: if pos + 1 >= maximum: return -1 - ch = state.srcCharCode[pos] + ch = state.src[pos] pos += 1 + ch_ord = ord(ch) # /* 0 */ /* 9 */ - if ch < 0x30 or ch > 0x39: + if ch_ord < 0x30 or ch_ord > 0x39: return -1 while True: @@ -55,11 +56,12 @@ def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: if pos >= maximum: return -1 - ch = state.srcCharCode[pos] + ch = state.src[pos] pos += 1 # /* 0 */ /* 9 */ - if ch >= 0x30 and ch <= 0x39: + ch_ord = ord(ch) + if ch_ord >= 0x30 and ch_ord <= 0x39: # List marker should have no more than 9 digits # (prevents integer overflow in browsers) if pos - start >= 10: @@ -67,16 +69,16 @@ def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: continue - # found valid marker: /* ) */ /* . */ - if ch == 0x29 or ch == 0x2E: + # found valid marker + if ch in (")", "."): break return -1 if pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if not isSpace(ch): + if not isStrSpace(ch): # " 1.test " - is not a list item return -1 @@ -159,7 +161,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> return False # We should terminate list on style change. Remember first one to compare. - markerCharCode = state.srcCharCode[posAfterMarker - 1] + markerChar = state.src[posAfterMarker - 1] # For validation mode we can terminate immediately if silent: @@ -177,7 +179,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> token = state.push("bullet_list_open", "ul", 1) token.map = listLines = [startLine, 0] - token.markup = chr(markerCharCode) + token.markup = markerChar # # Iterate list items @@ -201,11 +203,11 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> ) while pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if ch == 0x09: # \t + if ch == "\t": offset += 4 - (offset + state.bsCount[nextLine]) % 4 - elif ch == 0x20: # \s + elif ch == " ": offset += 1 else: break @@ -228,7 +230,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # Run subparser & write tokens token = state.push("list_item_open", "li", 1) - token.markup = chr(markerCharCode) + token.markup = markerChar token.map = itemLines = [startLine, 0] if isOrdered: token.info = state.src[start : posAfterMarker - 1] @@ -280,7 +282,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.tight = oldTight token = state.push("list_item_close", "li", -1) - token.markup = chr(markerCharCode) + token.markup = markerChar nextLine = startLine = state.line itemLines[1] = nextLine @@ -320,7 +322,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if posAfterMarker < 0: break - if markerCharCode != state.srcCharCode[posAfterMarker - 1]: + if markerChar != state.src[posAfterMarker - 1]: break # Finalize list @@ -329,7 +331,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> else: token = state.push("bullet_list_close", "ul", -1) - token.markup = chr(markerCharCode) + token.markup = markerChar listLines[1] = nextLine state.line = nextLine diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 92f0918c..b77944b2 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -19,17 +19,17 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> if state.is_code_block(startLine): return False - if state.srcCharCode[pos] != 0x5B: # /* [ */ + if state.src[pos] != "[": return False # Simple check to quickly interrupt scan on [link](url) at the start of line. # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54 while pos < maximum: # /* ] */ /* \ */ /* : */ - if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C: + if state.src[pos] == "]" and state.src[pos - 1] != "\\": if pos + 1 == maximum: return False - if state.srcCharCode[pos + 1] != 0x3A: + if state.src[pos + 1] != ":": return False break pos += 1 diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index ee77f097..96a2f88f 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Literal -from ..common.utils import isSpace +from ..common.utils import isStrSpace from ..ruler import StateBase from ..token import Token from ..utils import EnvType @@ -13,18 +13,9 @@ class StateBlock(StateBase): def __init__( - self, - src: str, - md: MarkdownIt, - env: EnvType, - tokens: list[Token], - srcCharCode: tuple[int, ...] | None = None, - ): - if srcCharCode is not None: - self._src = src - self.srcCharCode = srcCharCode - else: - self.src = src + self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token] + ) -> None: + self.src = src # link to parser instance self.md = md @@ -80,12 +71,12 @@ def __init__( start = pos = indent = offset = 0 length = len(self.src) - for pos, character in enumerate(self.srcCharCode): + for pos, character in enumerate(self.src): if not indent_found: - if isSpace(character): + if isStrSpace(character): indent += 1 - if character == 0x09: + if character == "\t": offset += 4 - offset % 4 else: offset += 1 @@ -93,8 +84,8 @@ def __init__( else: indent_found = True - if character == 0x0A or pos == length - 1: - if character != 0x0A: + if character == "\n" or pos == length - 1: + if character != "\n": pos += 1 self.bMarks.append(start) self.eMarks.append(pos) @@ -157,7 +148,7 @@ def skipEmptyLines(self, from_pos: int) -> int: def skipSpaces(self, pos: int) -> int: """Skip spaces from given position.""" while pos < len(self.src): - if not isSpace(self.srcCharCode[pos]): + if not isStrSpace(self.src[pos]): break pos += 1 return pos @@ -168,20 +159,28 @@ def skipSpacesBack(self, pos: int, minimum: int) -> int: return pos while pos > minimum: pos -= 1 - if not isSpace(self.srcCharCode[pos]): + if not isStrSpace(self.src[pos]): return pos + 1 return pos def skipChars(self, pos: int, code: int) -> int: - """Skip char codes from given position.""" + """Skip character code from given position.""" while pos < len(self.src): if self.srcCharCode[pos] != code: break pos += 1 return pos + def skipCharsStr(self, pos: int, ch: str) -> int: + """Skip character string from given position.""" + while pos < len(self.src): + if self.src[pos] != ch: + break + pos += 1 + return pos + def skipCharsBack(self, pos: int, code: int, minimum: int) -> int: - """Skip char codes reverse from given position - 1.""" + """Skip character code reverse from given position - 1.""" if pos <= minimum: return pos while pos > minimum: @@ -190,6 +189,16 @@ def skipCharsBack(self, pos: int, code: int, minimum: int) -> int: return pos + 1 return pos + def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int: + """Skip character string reverse from given position - 1.""" + if pos <= minimum: + return pos + while pos > minimum: + pos -= 1 + if ch != self.src[pos]: + return pos + 1 + return pos + def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: """Cut lines range from source.""" line = begin @@ -209,9 +218,9 @@ def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: ) while (first < last) and (lineIndent < indent): - ch = self.srcCharCode[first] - if isSpace(ch): - if ch == 0x09: + ch = self.src[first] + if isStrSpace(ch): + if ch == "\t": lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4 else: lineIndent += 1 diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index 8f7be7f1..4b666c1d 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -3,7 +3,7 @@ import re -from ..common.utils import charCodeAt, isSpace +from ..common.utils import charStrAt, isStrSpace from .state_block import StateBlock headerLineRe = re.compile(r"^:?-+:?$") @@ -25,10 +25,10 @@ def escapedSplit(string: str) -> list[str]: isEscaped = False lastPos = 0 current = "" - ch = charCodeAt(string, pos) + ch = charStrAt(string, pos) while pos < max: - if ch == 0x7C: # /* | */ + if ch == "|": if not isEscaped: # pipe separating cells, '|' result.append(current + string[lastPos:pos]) @@ -39,10 +39,10 @@ def escapedSplit(string: str) -> list[str]: current += string[lastPos : pos - 1] lastPos = pos - isEscaped = ch == 0x5C # /* \ */ + isEscaped = ch == "\\" pos += 1 - ch = charCodeAt(string, pos) + ch = charStrAt(string, pos) result.append(current + string[lastPos:]) @@ -71,29 +71,27 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool pos = state.bMarks[nextLine] + state.tShift[nextLine] if pos >= state.eMarks[nextLine]: return False - first_ch = state.srcCharCode[pos] + first_ch = state.src[pos] pos += 1 - if first_ch not in {0x7C, 0x2D, 0x3A}: # not in {"|", "-", ":"} + if first_ch not in ("|", "-", ":"): return False if pos >= state.eMarks[nextLine]: return False - second_ch = state.srcCharCode[pos] + second_ch = state.src[pos] pos += 1 - # not in {"|", "-", ":"} and not space - if second_ch not in {0x7C, 0x2D, 0x3A} and not isSpace(second_ch): + if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch): return False # if first character is '-', then second character must not be a space # (due to parsing ambiguity with list) - if first_ch == 0x2D and isSpace(second_ch): + if first_ch == "-" and isStrSpace(second_ch): return False while pos < state.eMarks[nextLine]: - ch = state.srcCharCode[pos] + ch = state.src[pos] - # /* | */ /* - */ /* : */ - if ch not in {0x7C, 0x2D, 0x3A} and not isSpace(ch): + if ch not in ("|", "-", ":") and not isStrSpace(ch): return False pos += 1 @@ -114,10 +112,9 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if not headerLineRe.search(t): return False - if charCodeAt(t, len(t) - 1) == 0x3A: # /* : */ - # /* : */ - aligns.append("center" if charCodeAt(t, 0) == 0x3A else "right") - elif charCodeAt(t, 0) == 0x3A: # /* : */ + if charStrAt(t, len(t) - 1) == ":": + aligns.append("center" if charStrAt(t, 0) == ":" else "right") + elif charStrAt(t, 0) == ":": aligns.append("left") else: aligns.append("") diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py index dc756418..a6c3bb8d 100644 --- a/markdown_it/rules_core/block.py +++ b/markdown_it/rules_core/block.py @@ -10,6 +10,4 @@ def block(state: StateCore) -> None: token.children = [] state.tokens.append(token) else: - state.md.block.parse( - state.src, state.md, state.env, state.tokens, state.srcCharCode - ) + state.md.block.parse(state.src, state.md, state.env, state.tokens) diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index b4284493..c98fbd71 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -24,9 +24,7 @@ def replaceAt(string: str, index: int, ch: str) -> str: def process_inlines(tokens: list[Token], state: StateCore) -> None: stack: list[dict[str, Any]] = [] - for i in range(len(tokens)): - token = tokens[i] - + for i, token in enumerate(tokens): thisLevel = token.level j = 0 @@ -60,13 +58,12 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: # Find previous character, # default to space if it's the beginning of the line - lastChar = 0x20 + lastChar: None | int = 0x20 if t.start(0) + lastIndex - 1 >= 0: lastChar = charCodeAt(text, t.start(0) + lastIndex - 1) else: for j in range(i)[::-1]: - # lastChar defaults to 0x20 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": break # should skip all tokens except 'text', 'html_inline' or 'code_inline' @@ -78,7 +75,7 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: # Find next character, # default to space if it's the end of the line - nextChar = 0x20 + nextChar: None | int = 0x20 if pos < maximum: nextChar = charCodeAt(text, pos) @@ -94,11 +91,15 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: nextChar = charCodeAt(tokens[j].content, 0) break - isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) - isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + isLastPunctChar = lastChar is not None and ( + isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) + ) + isNextPunctChar = nextChar is not None and ( + isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + ) - isLastWhiteSpace = isWhiteSpace(lastChar) - isNextWhiteSpace = isWhiteSpace(nextChar) + isLastWhiteSpace = lastChar is not None and isWhiteSpace(lastChar) + isNextWhiteSpace = nextChar is not None and isWhiteSpace(nextChar) if isNextWhiteSpace: # noqa: SIM114 canOpen = False @@ -111,7 +112,9 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: canClose = False if nextChar == 0x22 and t.group(0) == '"': # 0x22: " # noqa: SIM102 - if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9 + if ( + lastChar is not None and lastChar >= 0x30 and lastChar <= 0x39 + ): # 0x30: 0, 0x39: 9 # special case: 1"" - count first quote as an inch canClose = canOpen = False diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index 11ac5905..295d963f 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -12,7 +12,7 @@ def autolink(state: StateInline, silent: bool) -> bool: pos = state.pos - if state.srcCharCode[pos] != 0x3C: # /* < */ + if state.src[pos] != "<": return False start = state.pos @@ -23,11 +23,11 @@ def autolink(state: StateInline, silent: bool) -> bool: if pos >= maximum: return False - ch = state.srcCharCode[pos] + ch = state.src[pos] - if ch == 0x3C: # /* < */ + if ch == "<": return False - if ch == 0x3E: # /* > */ + if ch == ">": break url = state.src[start + 1 : pos] diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py index 5f1e0552..fc60d6b1 100644 --- a/markdown_it/rules_inline/backticks.py +++ b/markdown_it/rules_inline/backticks.py @@ -8,10 +8,8 @@ def backtick(state: StateInline, silent: bool) -> bool: pos = state.pos - ch = state.srcCharCode[pos] - # /* ` */ - if ch != 0x60: + if state.src[pos] != "`": return False start = pos @@ -19,7 +17,7 @@ def backtick(state: StateInline, silent: bool) -> bool: maximum = state.posMax # scan marker length - while pos < maximum and (state.srcCharCode[pos] == 0x60): # /* ` */ + while pos < maximum and (state.src[pos] == "`"): pos += 1 marker = state.src[start:pos] @@ -42,7 +40,7 @@ def backtick(state: StateInline, silent: bool) -> bool: matchEnd = matchStart + 1 # scan marker length - while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60): # /* ` */ + while matchEnd < maximum and (state.src[matchEnd] == "`"): matchEnd += 1 closerLength = matchEnd - matchStart diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index d21b494c..56b94b6b 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -8,23 +8,22 @@ def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = state.srcCharCode[start] + marker = state.src[start] if silent: return False - # /* _ */ /* * */ - if marker != 0x5F and marker != 0x2A: + if marker not in ("_", "*"): return False - scanned = state.scanDelims(state.pos, marker == 0x2A) + scanned = state.scanDelims(state.pos, marker == "*") for i in range(scanned.length): token = state.push("text", "", 0) - token.content = chr(marker) + token.content = marker state.delimiters.append( Delimiter( - marker=marker, + marker=ord(marker), length=scanned.length, jump=i, token=len(state.tokens) - 1, diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 1e5d0ea0..d3b5f6bb 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -13,13 +13,11 @@ def entity(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax - if state.srcCharCode[pos] != 0x26: # /* & */ + if state.src[pos] != "&": return False if (pos + 1) < maximum: - ch = state.srcCharCode[pos + 1] - - if ch == 0x23: # /* # */ + if state.src[pos + 1] == "#": match = DIGITAL_RE.search(state.src[pos:]) if match: if not silent: diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 1767e01d..8694cec1 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -1,42 +1,72 @@ """ Process escaped chars and hardbreaks """ -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_inline import StateInline -ESCAPED = [0 for _ in range(256)] -for ch in "\\!\"#$%&'()*+,./:;<=>?@[]^_`{|}~-": - ESCAPED[ord(ch)] = 1 +_ESCAPED = { + "!", + '"', + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "|", + "}", + "~", +} def escape(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax - # /* \ */ - if state.srcCharCode[pos] != 0x5C: + if state.src[pos] != "\\": return False pos += 1 if pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if ch < 256 and ESCAPED[ch] != 0: + if ch in _ESCAPED: if not silent: state.pending += state.src[pos] state.pos += 2 return True - if ch == 0x0A: + if ch == "\n": if not silent: state.push("hardbreak", "br", 0) pos += 1 # skip leading whitespaces from next line while pos < maximum: - ch = state.srcCharCode[pos] - if not isSpace(ch): + ch = state.src[pos] + if not isStrSpace(ch): break pos += 1 diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 6a636684..3c8b5331 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -17,17 +17,12 @@ def html_inline(state: StateInline, silent: bool) -> bool: # Check start maximum = state.posMax - if state.srcCharCode[pos] != 0x3C or pos + 2 >= maximum: # /* < */ + if state.src[pos] != "<" or pos + 2 >= maximum: return False # Quick fail on second char - ch = state.srcCharCode[pos + 1] - if ( - ch != 0x21 - and ch != 0x3F # /* ! */ - and ch != 0x2F # /* ? */ - and not isLetter(ch) # /* / */ - ): + ch = state.src[pos + 1] + if ch not in ("!", "?", "/") and not isLetter(ord(ch)): # /* / */ return False match = HTML_TAG_RE.search(state.src[pos:]) diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index 0cb14ffd..b4a32a9f 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -1,7 +1,7 @@ # Process ![image]( "title") from __future__ import annotations -from ..common.utils import isSpace, normalizeReference +from ..common.utils import isStrSpace, normalizeReference from ..token import Token from .state_inline import StateInline @@ -12,11 +12,10 @@ def image(state: StateInline, silent: bool) -> bool: oldPos = state.pos max = state.posMax - # /* ! */ - if state.srcCharCode[state.pos] != 0x21: + if state.src[state.pos] != "!": return False - # /* [ */ - if state.pos + 1 < state.posMax and state.srcCharCode[state.pos + 1] != 0x5B: + + if state.pos + 1 < state.posMax and state.src[state.pos + 1] != "[": return False labelStart = state.pos + 2 @@ -27,8 +26,8 @@ def image(state: StateInline, silent: bool) -> bool: return False pos = labelEnd + 1 - # /* ( */ - if pos < max and state.srcCharCode[pos] == 0x28: + + if pos < max and state.src[pos] == "(": # # Inline link # @@ -37,8 +36,8 @@ def image(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces pos += 1 while pos < max: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -60,8 +59,8 @@ def image(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces start = pos while pos < max: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -75,15 +74,14 @@ def image(state: StateInline, silent: bool) -> bool: # [link]( "title" ) # ^^ skipping these spaces while pos < max: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 else: title = "" - # /* ) */ - if pos >= max or state.srcCharCode[pos] != 0x29: + if pos >= max or state.src[pos] != ")": state.pos = oldPos return False @@ -97,7 +95,7 @@ def image(state: StateInline, silent: bool) -> bool: return False # /* [ */ - if pos < max and state.srcCharCode[pos] == 0x5B: + if pos < max and state.src[pos] == "[": start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index c4548ccd..18c0736c 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -1,6 +1,6 @@ # Process [link]( "stuff") -from ..common.utils import isSpace, normalizeReference +from ..common.utils import isStrSpace, normalizeReference from .state_inline import StateInline @@ -13,7 +13,7 @@ def link(state: StateInline, silent: bool) -> bool: start = state.pos parseReference = True - if state.srcCharCode[state.pos] != 0x5B: # /* [ */ + if state.src[state.pos] != "[": return False labelStart = state.pos + 1 @@ -25,7 +25,7 @@ def link(state: StateInline, silent: bool) -> bool: pos = labelEnd + 1 - if pos < maximum and state.srcCharCode[pos] == 0x28: # /* ( */ + if pos < maximum and state.src[pos] == "(": # # Inline link # @@ -37,8 +37,8 @@ def link(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces pos += 1 while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -60,8 +60,8 @@ def link(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces start = pos while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -75,12 +75,12 @@ def link(state: StateInline, silent: bool) -> bool: # [link]( "title" ) # ^^ skipping these spaces while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 - if pos >= maximum or state.srcCharCode[pos] != 0x29: # /* ) */ + if pos >= maximum or state.src[pos] != ")": # parsing a valid shortcut link failed, fallback to reference parseReference = True @@ -93,7 +93,7 @@ def link(state: StateInline, silent: bool) -> bool: if "references" not in state.env: return False - if pos < maximum and state.srcCharCode[pos] == 0x5B: # /* [ */ + if pos < maximum and state.src[pos] == "[": start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index 4c440579..dede7251 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -1,7 +1,7 @@ # Proceess '\n' import re -from ..common.utils import charCodeAt, isSpace +from ..common.utils import charStrAt, isStrSpace from .state_inline import StateInline endSpace = re.compile(r" +$") @@ -10,8 +10,7 @@ def newline(state: StateInline, silent: bool) -> bool: pos = state.pos - # /* \n */ - if state.srcCharCode[pos] != 0x0A: + if state.src[pos] != "\n": return False pmax = len(state.pending) - 1 @@ -22,8 +21,8 @@ def newline(state: StateInline, silent: bool) -> bool: # Pending string is stored in concat mode, indexed lookups will cause # conversion to flat mode. if not silent: - if pmax >= 0 and charCodeAt(state.pending, pmax) == 0x20: - if pmax >= 1 and charCodeAt(state.pending, pmax - 1) == 0x20: + if pmax >= 0 and charStrAt(state.pending, pmax) == " ": + if pmax >= 1 and charStrAt(state.pending, pmax - 1) == " ": state.pending = endSpace.sub("", state.pending) state.push("hardbreak", "br", 0) else: @@ -36,7 +35,7 @@ def newline(state: StateInline, silent: bool) -> bool: pos += 1 # skip heading spaces for next line - while pos < maximum and isSpace(state.srcCharCode[pos]): + while pos < maximum and isStrSpace(state.src[pos]): pos += 1 state.pos = pos diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 12e1d934..ef23f85d 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -132,24 +132,24 @@ def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: """ pos = start maximum = self.posMax - marker = self.srcCharCode[start] + marker = self.src[start] # treat beginning of the line as a whitespace - lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20 + lastChar = self.src[start - 1] if start > 0 else " " - while pos < maximum and self.srcCharCode[pos] == marker: + while pos < maximum and self.src[pos] == marker: pos += 1 count = pos - start # treat end of the line as a whitespace - nextChar = self.srcCharCode[pos] if pos < maximum else 0x20 + nextChar = self.src[pos] if pos < maximum else " " - isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) - isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar) + isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar) - isLastWhiteSpace = isWhiteSpace(lastChar) - isNextWhiteSpace = isWhiteSpace(nextChar) + isLastWhiteSpace = isWhiteSpace(ord(lastChar)) + isNextWhiteSpace = isWhiteSpace(ord(nextChar)) left_flanking = not ( isNextWhiteSpace diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index 8b080816..f671412c 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -7,17 +7,16 @@ def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = state.srcCharCode[start] + ch = state.src[start] if silent: return False - if marker != 0x7E: # /* ~ */ + if ch != "~": return False scanned = state.scanDelims(state.pos, True) length = scanned.length - ch = chr(marker) if length < 2: return False @@ -33,15 +32,13 @@ def tokenize(state: StateInline, silent: bool) -> bool: token.content = ch + ch state.delimiters.append( Delimiter( - **{ - "marker": marker, - "length": 0, # disable "rule of 3" length checks meant for emphasis - "jump": i // 2, # for `~~` 1 marker = 2 characters - "token": len(state.tokens) - 1, - "end": -1, - "open": scanned.can_open, - "close": scanned.can_close, - } + marker=ord(ch), + length=0, # disable "rule of 3" length checks meant for emphasis + jump=i // 2, # for `~~` 1 marker = 2 characters + token=len(state.tokens) - 1, + end=-1, + open=scanned.can_open, + close=scanned.can_close, ) ) diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index bdf55310..f306b2e4 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -1,50 +1,45 @@ # Skip text characters for text token, place those to pending buffer # and increment current pos -from typing import Any - from .state_inline import StateInline # Rule to skip pure text # '{}$%@~+=:' reserved for extensions -# !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~ - # !!!! Don't confuse with "Markdown ASCII Punctuation" chars # http://spec.commonmark.org/0.15/#ascii-punctuation-character -def isTerminatorChar(ch: int) -> bool: - return ch in { - 0x0A, # /* \n */: - 0x21, # /* ! */: - 0x23, # /* # */: - 0x24, # /* $ */: - 0x25, # /* % */: - 0x26, # /* & */: - 0x2A, # /* * */: - 0x2B, # /* + */: - 0x2D, # /* - */: - 0x3A, # /* : */: - 0x3C, # /* < */: - 0x3D, # /* = */: - 0x3E, # /* > */: - 0x40, # /* @ */: - 0x5B, # /* [ */: - 0x5C, # /* \ */: - 0x5D, # /* ] */: - 0x5E, # /* ^ */: - 0x5F, # /* _ */: - 0x60, # /* ` */: - 0x7B, # /* { */: - 0x7D, # /* } */: - 0x7E, # /* ~ */: - } - - -def text(state: StateInline, silent: bool, **args: Any) -> bool: +_TerminatorChars = { + "\n", + "!", + "#", + "$", + "%", + "&", + "*", + "+", + "-", + ":", + "<", + "=", + ">", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "}", + "~", +} + + +def text(state: StateInline, silent: bool) -> bool: pos = state.pos posMax = state.posMax - while (pos < posMax) and not isTerminatorChar(state.srcCharCode[pos]): + while (pos < posMax) and state.src[pos] not in _TerminatorChars: pos += 1 if pos == state.pos: diff --git a/scripts/profiler.py b/scripts/profiler.py index 414a7727..a593baa1 100644 --- a/scripts/profiler.py +++ b/scripts/profiler.py @@ -9,7 +9,7 @@ from markdown_it import MarkdownIt commonmark_spec = ( - (Path(__file__).parent / "tests" / "test_cmark_spec" / "spec.md") + (Path(__file__).parent.parent / "tests" / "test_cmark_spec" / "spec.md") .read_bytes() .decode() ) From 36a428b280b326bf6bfd98b657be2e408d2a87ab Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 1 Jun 2023 05:03:53 +0200 Subject: [PATCH 15/28] =?UTF-8?q?=F0=9F=91=8C=20Improve=20performance=20of?= =?UTF-8?q?=20`skipSpaces`/`skipChars`=20(#271)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't compute src length on every iteration --- markdown_it/rules_block/state_block.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 96a2f88f..445ad265 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -147,8 +147,12 @@ def skipEmptyLines(self, from_pos: int) -> int: def skipSpaces(self, pos: int) -> int: """Skip spaces from given position.""" - while pos < len(self.src): - if not isStrSpace(self.src[pos]): + while True: + try: + current = self.src[pos] + except IndexError: + break + if not isStrSpace(current): break pos += 1 return pos @@ -165,16 +169,24 @@ def skipSpacesBack(self, pos: int, minimum: int) -> int: def skipChars(self, pos: int, code: int) -> int: """Skip character code from given position.""" - while pos < len(self.src): - if self.srcCharCode[pos] != code: + while True: + try: + current = self.srcCharCode[pos] + except IndexError: + break + if current != code: break pos += 1 return pos def skipCharsStr(self, pos: int, ch: str) -> int: """Skip character string from given position.""" - while pos < len(self.src): - if self.src[pos] != ch: + while True: + try: + current = self.src[pos] + except IndexError: + break + if current != ch: break pos += 1 return pos From 4e6dfd5994bc765c3d4d5c308e10831c86a1452f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 09:19:23 +0200 Subject: [PATCH 16/28] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20numeric=20character?= =?UTF-8?q?=20reference=20passing=20(#272)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix issue with incorrect determination of a numeric character reference, and subsequent failure to convert to an integer code. From https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py, fixes issue 55371 This also essentially fixes a bug in upstream, see https://github.com/markdown-it/markdown-it/issues/935 --- markdown_it/common/utils.py | 74 +++++++------------------ tests/test_fuzzer.py | 14 ++--- tests/test_port/fixtures/issue-fixes.md | 9 +++ 3 files changed, 36 insertions(+), 61 deletions(-) diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 4effc00f..6bf9a36f 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -2,7 +2,6 @@ """ from __future__ import annotations -import html import re from typing import Match, TypeVar @@ -52,9 +51,6 @@ def arrayReplaceAt( return src[:pos] + newElements + src[pos + 1 :] -###################################################################### - - def isValidEntityCode(c: int) -> bool: # broken sequence if c >= 0xD800 and c <= 0xDFFF: @@ -89,47 +85,33 @@ def fromCodePoint(c: int) -> str: return chr(c) -UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])') +# UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])') # ENTITY_RE_g = re.compile(r'&([a-z#][a-z0-9]{1,31})', re.IGNORECASE) UNESCAPE_ALL_RE = re.compile( r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' + "|" + r"&([a-z#][a-z0-9]{1,31});", re.IGNORECASE, ) -DIGITAL_ENTITY_TEST_RE = re.compile(r"^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))", re.IGNORECASE) +DIGITAL_ENTITY_BASE10_RE = re.compile(r"#([0-9]{1,8})") +DIGITAL_ENTITY_BASE16_RE = re.compile(r"#x([a-f0-9]{1,8})", re.IGNORECASE) def replaceEntityPattern(match: str, name: str) -> str: - """Convert HTML entity patterns - - :: - - https://www.google.com -> https%3A//www.google.com - + """Convert HTML entity patterns, + see https://spec.commonmark.org/0.30/#entity-references """ - code = 0 - if name in entities: return entities[name] - if name[0] == "#" and DIGITAL_ENTITY_TEST_RE.search(name): - code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10) - if isValidEntityCode(code): - return fromCodePoint(code) - - return match - - -# def replaceEntities(string): -# if (string.indexOf('&') < 0): -# return string -# return string.replace(ENTITY_RE, replaceEntityPattern) + code: None | int = None + if pat := DIGITAL_ENTITY_BASE10_RE.fullmatch(name): + code = int(pat.group(1), 10) + elif pat := DIGITAL_ENTITY_BASE16_RE.fullmatch(name): + code = int(pat.group(1), 16) + if code is not None and isValidEntityCode(code): + return fromCodePoint(code) -def unescapeMd(string: str) -> str: - raise NotImplementedError - # if "\\" in string: - # return string - # return string.replace(UNESCAPE_MD_RE, "$1") + return match def unescapeAll(string: str) -> str: @@ -154,30 +136,14 @@ def stripEscape(string: str) -> str: return ESCAPE_CHAR.sub(r"\1", string) -# ////////////////////////////////////////////////////////////////////////////// - -# TODO This section changed quite a lot, should re-check - -# UNESCAPE_HTML_RE = re.compile(r"\\&(?=(amp\;|lt\;|gt\;|quot\;))") -# ESCAPE_AND_HTML = re.compile(r"&(?!(amp\;|lt\;|gt\;|quot\;))") -# HTML_ESCAPE_REPLACE_RE = re.compile(r'[&<>"]') - - -# def escapeHtml(string: str): - -# if HTML_ESCAPE_REPLACE_RE.search(string): - -# string = UNESCAPE_HTML_RE.sub("&", string) -# string = ESCAPE_AND_HTML.sub("&", string) -# for k, v in {"<": "<", ">": ">", '"': """}.items(): -# string = string.replace(k, v) - -# return string - - def escapeHtml(raw: str) -> str: - # return html.escape(html.unescape(raw)).replace("'", "'") - return html.escape(raw).replace("'", "'") + """Replace special characters "&", "<", ">" and '"' to HTML-safe sequences.""" + # like html.escape, but without escaping single quotes + raw = raw.replace("&", "&") # Must be done first! + raw = raw.replace("<", "<") + raw = raw.replace(">", ">") + raw = raw.replace('"', """) + return raw # ////////////////////////////////////////////////////////////////////////////// diff --git a/tests/test_fuzzer.py b/tests/test_fuzzer.py index 60cdddaa..f3666cc5 100644 --- a/tests/test_fuzzer.py +++ b/tests/test_fuzzer.py @@ -10,15 +10,15 @@ from markdown_it import MarkdownIt TESTS = { - 55363: ">```\n>", - 55367: ">-\n>\n>", - # 55371: "[](so»0;!" TODO this did not fail - # 55401: "?c_" * 100_000 TODO this did not fail + 55363: (">```\n>", "
    \n
    \n
    \n"), + 55367: (">-\n>\n>", "
    \n
      \n
    • \n
    \n
    \n"), + 55371: ("[](soH0;!", "

    [](so&#4H0;!

    \n"), + # 55401: (("?c_" * 100000) + "c_", ""), TODO this does not fail, just takes a long time } -@pytest.mark.parametrize("raw_input", TESTS.values(), ids=TESTS.keys()) -def test_fuzzing(raw_input): +@pytest.mark.parametrize("raw_input,expected", TESTS.values(), ids=TESTS.keys()) +def test_fuzzing(raw_input, expected): md = MarkdownIt() md.parse(raw_input) - print(md.render(raw_input)) + assert md.render(raw_input) == expected diff --git a/tests/test_port/fixtures/issue-fixes.md b/tests/test_port/fixtures/issue-fixes.md index 319945af..b630fcee 100644 --- a/tests/test_port/fixtures/issue-fixes.md +++ b/tests/test_port/fixtures/issue-fixes.md @@ -45,3 +45,12 @@ Fix CVE-2023-26303

    . + +Fix parsing of incorrect numeric character references +. +[]("y;) "y; +[](#y;) #y; +. +

    &#X22y; + &#35y;

    +. From eb96da144db03319bacdda9dc765d8cd66f50dd3 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 13:34:19 +0200 Subject: [PATCH 17/28] =?UTF-8?q?=F0=9F=91=8C=20Improve=20nested=20emphasi?= =?UTF-8?q?s=20parsing=20(#273)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes quadratic complexity in e.g. `**<...>**a**<...>**` Implementation of upstream commit: https://github.com/markdown-it/markdown-it/commit/24abaa51a605c2de14da59893797733921f09bb8 --- markdown_it/port.yaml | 6 ++-- markdown_it/rules_inline/balance_pairs.py | 43 +++++++++++++++++------ markdown_it/rules_inline/emphasis.py | 7 ++-- markdown_it/rules_inline/state_inline.py | 8 ----- markdown_it/rules_inline/strikethrough.py | 1 - 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 945a19f6..679d13b7 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it - version: 12.2.0 - commit: 6e2de08a0b03d3d0dcc524b89710ce05f83a0283 - date: Aug 2, 2021 + version: 12.3.0 + commit: 2e31d3430187d2eee1ba120c954783eebb93b4e8 + date: Dec 9, 2021 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index 6125de71..bbb2101c 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -6,13 +6,33 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: """For each opening emphasis-like marker find a matching closing one.""" + if not delimiters: + return + openersBottom = {} maximum = len(delimiters) + # headerIdx is the first delimiter of the current (where closer is) delimiter run + headerIdx = 0 + lastTokenIdx = -2 # needs any value lower than -1 + jumps: list[int] = [] closerIdx = 0 while closerIdx < maximum: closer = delimiters[closerIdx] + jumps.append(0) + + # markers belong to same delimiter run if: + # - they have adjacent tokens + # - AND markers are the same + # + if ( + delimiters[headerIdx].marker != closer.marker + or lastTokenIdx != closer.token - 1 + ): + headerIdx = closerIdx + lastTokenIdx = closer.token + # Length is only used for emphasis-specific "rule of 3", # if it's not defined (in strikethrough or 3rd party plugins), # we can default it to 0 to disable those checks. @@ -34,12 +54,7 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: (3 if closer.open else 0) + (closer.length % 3) ] - openerIdx = closerIdx - closer.jump - 1 - - # avoid crash if `closer.jump` is pointing outside of the array, - # e.g. for strikethrough - if openerIdx < -1: - openerIdx = -1 + openerIdx = headerIdx - jumps[headerIdx] - 1 newMinOpenerIdx = openerIdx @@ -47,7 +62,7 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: opener = delimiters[openerIdx] if opener.marker != closer.marker: - openerIdx -= opener.jump + 1 + openerIdx -= jumps[openerIdx] + 1 continue if opener.open and opener.end < 0: @@ -73,19 +88,25 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: # sure algorithm has linear complexity (see *_*_*_*_*_... case). # if openerIdx > 0 and not delimiters[openerIdx - 1].open: - lastJump = delimiters[openerIdx - 1].jump + 1 + lastJump = jumps[openerIdx - 1] + 1 else: lastJump = 0 - closer.jump = closerIdx - openerIdx + lastJump + jumps[closerIdx] = closerIdx - openerIdx + lastJump + jumps[openerIdx] = lastJump + closer.open = False opener.end = closerIdx - opener.jump = lastJump opener.close = False newMinOpenerIdx = -1 + + # treat next token as start of run, + # it optimizes skips in **<...>**a**<...>** pathological case + lastTokenIdx = -2 + break - openerIdx -= opener.jump + 1 + openerIdx -= jumps[openerIdx] + 1 if newMinOpenerIdx != -1: # If match for this delimiter run failed, we want to set lower bound for diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index 56b94b6b..9a98f9e2 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -18,14 +18,13 @@ def tokenize(state: StateInline, silent: bool) -> bool: scanned = state.scanDelims(state.pos, marker == "*") - for i in range(scanned.length): + for _ in range(scanned.length): token = state.push("text", "", 0) token.content = marker state.delimiters.append( Delimiter( marker=ord(marker), length=scanned.length, - jump=i, token=len(state.tokens) - 1, end=-1, open=scanned.can_open, @@ -63,9 +62,11 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: isStrong = ( i > 0 and delimiters[i - 1].end == startDelim.end + 1 + # check that first two markers match and adjacent + and delimiters[i - 1].marker == startDelim.marker and delimiters[i - 1].token == startDelim.token - 1 + # check that last two markers are adjacent (we can safely assume they match) and delimiters[startDelim.end + 1].token == endDelim.token + 1 - and delimiters[i - 1].marker == startDelim.marker ) ch = chr(startDelim.marker) diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index ef23f85d..143ab33e 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -22,14 +22,6 @@ class Delimiter: # Total length of these series of delimiters. length: int - # An amount of characters before this one that's equivalent to - # current one. In plain English: if this delimiter does not open - # an emphasis, neither do previous `jump` characters. - # - # Used to skip sequences like "*****" in one step, for 1st asterisk - # value will be 0, for 2nd it's 1 and so on. - jump: int - # A position of the token this delimiter corresponds to. token: int diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index f671412c..ec816281 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -34,7 +34,6 @@ def tokenize(state: StateInline, silent: bool) -> bool: Delimiter( marker=ord(ch), length=0, # disable "rule of 3" length checks meant for emphasis - jump=i // 2, # for `~~` 1 marker = 2 characters token=len(state.tokens) - 1, end=-1, open=scanned.can_open, From 84dcefe28712c2488e4f1a1c1a03afe5222a70c0 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 13:43:54 +0200 Subject: [PATCH 18/28] =?UTF-8?q?=F0=9F=90=9B=20Fix:=20tab=20preventing=20?= =?UTF-8?q?paragraph=20continuation=20in=20lists=20(#274)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements upstream: https://github.com/markdown-it/markdown-it/commit/1cd8a5143b22967a7583bba19678900efdf72adf --- markdown_it/port.yaml | 6 +++--- markdown_it/rules_block/list.py | 2 +- tests/test_port/fixtures/commonmark_extras.md | 21 +++++++++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 679d13b7..c0a4d70b 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it - version: 12.3.0 - commit: 2e31d3430187d2eee1ba120c954783eebb93b4e8 - date: Dec 9, 2021 + version: 12.3.1 + commit: 76469e83dc1a1e3ed943b483b554003a666bddf7 + date: Jan 7, 2022 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index a5c596bb..d8070d74 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -130,7 +130,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if ( silent and state.parentType == "paragraph" - and state.tShift[startLine] >= state.blkIndent + and state.sCount[startLine] >= state.blkIndent ): isTerminatingParagraph = True diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md index 168b039d..5d13d859 100644 --- a/tests/test_port/fixtures/commonmark_extras.md +++ b/tests/test_port/fixtures/commonmark_extras.md @@ -166,6 +166,27 @@ Regression test (code block + regular paragraph) . +Regression test (tabs in lists, #830) +. +1. asd + 2. asd + +--- + +1. asd + 2. asd +. +
      +
    1. asd +2. asd
    2. +
    +
    +
      +
    1. asd +2. asd
    2. +
    +. + Blockquotes inside indented lists should terminate correctly . - a From 500e69e6d0a49a6970cb277802772c0317bf9f2a Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 14:03:52 +0200 Subject: [PATCH 19/28] =?UTF-8?q?=F0=9F=91=8C=20fix=20possible=20ReDOS=20i?= =?UTF-8?q?n=20newline=20rule=20(#275)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements upstream: https://github.com/markdown-it/markdown-it/commit/ffc49ab46b5b751cd2be0aabb146f2ef84986101 --- markdown_it/port.yaml | 6 +++--- markdown_it/rules_inline/newline.py | 13 +++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index c0a4d70b..a553fe1a 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it - version: 12.3.1 - commit: 76469e83dc1a1e3ed943b483b554003a666bddf7 - date: Jan 7, 2022 + version: 12.3.2 + commit: d72c68b520cedacae7878caa92bf7fe32e3e0e6f + date: Jan 8, 2022 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index dede7251..ca8f1db0 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -1,11 +1,7 @@ -# Proceess '\n' -import re - +"""Proceess '\n'.""" from ..common.utils import charStrAt, isStrSpace from .state_inline import StateInline -endSpace = re.compile(r" +$") - def newline(state: StateInline, silent: bool) -> bool: pos = state.pos @@ -23,7 +19,12 @@ def newline(state: StateInline, silent: bool) -> bool: if not silent: if pmax >= 0 and charStrAt(state.pending, pmax) == " ": if pmax >= 1 and charStrAt(state.pending, pmax - 1) == " ": - state.pending = endSpace.sub("", state.pending) + # Find whitespaces tail of pending chars. + ws = pmax - 1 + while ws >= 1 and charStrAt(state.pending, ws - 1) == " ": + ws -= 1 + state.pending = state.pending[:ws] + state.push("hardbreak", "br", 0) else: state.pending = state.pending[:-1] From ba96f34dc14c0d8cd274f1c9d9e56f2187707710 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 15:18:38 +0200 Subject: [PATCH 20/28] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor=20backslash?= =?UTF-8?q?=20escape=20logic=20(#276)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- markdown_it/parser_core.py | 11 ++- markdown_it/parser_inline.py | 9 +- markdown_it/presets/commonmark.py | 4 +- markdown_it/presets/zero.py | 7 +- markdown_it/rules_core/__init__.py | 2 + markdown_it/rules_core/text_join.py | 34 +++++++ markdown_it/rules_inline/__init__.py | 4 +- markdown_it/rules_inline/escape.py | 91 +++++++++++-------- .../{text_collapse.py => fragments_join.py} | 2 +- tests/test_api/test_main.py | 35 +++---- tests/test_port/fixtures/linkify.md | 9 +- tests/test_port/fixtures/smartquotes.md | 15 ++- tests/test_port/fixtures/typographer.md | 17 ++++ 13 files changed, 173 insertions(+), 67 deletions(-) create mode 100644 markdown_it/rules_core/text_join.py rename markdown_it/rules_inline/{text_collapse.py => fragments_join.py} (96%) diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index 251b7634..b3eb8abe 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -7,7 +7,15 @@ from __future__ import annotations from .ruler import RuleFunc, Ruler -from .rules_core import block, inline, linkify, normalize, replace, smartquotes +from .rules_core import ( + block, + inline, + linkify, + normalize, + replace, + smartquotes, + text_join, +) from .rules_core.state_core import StateCore _rules: list[tuple[str, RuleFunc]] = [ @@ -17,6 +25,7 @@ ("linkify", linkify), ("replacements", replace), ("smartquotes", smartquotes), + ("text_join", text_join), ] diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index a8228524..88140d3d 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -28,11 +28,18 @@ ("entity", rules_inline.entity), ] +# Note `rule2` ruleset was created specifically for emphasis/strikethrough +# post-processing and may be changed in the future. +# +# Don't use this for anything except pairs (plugins working with `balance_pairs`). +# _rules2: list[tuple[str, RuleFunc]] = [ ("balance_pairs", rules_inline.link_pairs), ("strikethrough", rules_inline.strikethrough.postProcess), ("emphasis", rules_inline.emphasis.postProcess), - ("text_collapse", rules_inline.text_collapse), + # rules for pairs separate '**' into its own text tokens, which may be left unused, + # rule below merges unused segments back with the rest of the text + ("fragments_join", rules_inline.fragments_join), ] diff --git a/markdown_it/presets/commonmark.py b/markdown_it/presets/commonmark.py index 60a39250..3990d434 100644 --- a/markdown_it/presets/commonmark.py +++ b/markdown_it/presets/commonmark.py @@ -40,7 +40,7 @@ def make() -> PresetType: "highlight": None, }, "components": { - "core": {"rules": ["normalize", "block", "inline"]}, + "core": {"rules": ["normalize", "block", "inline", "text_join"]}, "block": { "rules": [ "blockquote", @@ -68,7 +68,7 @@ def make() -> PresetType: "newline", "text", ], - "rules2": ["balance_pairs", "emphasis", "text_collapse"], + "rules2": ["balance_pairs", "emphasis", "fragments_join"], }, }, } diff --git a/markdown_it/presets/zero.py b/markdown_it/presets/zero.py index fcc5eb3a..2f69a58d 100644 --- a/markdown_it/presets/zero.py +++ b/markdown_it/presets/zero.py @@ -33,8 +33,11 @@ def make() -> PresetType: "highlight": None, }, "components": { - "core": {"rules": ["normalize", "block", "inline"]}, + "core": {"rules": ["normalize", "block", "inline", "text_join"]}, "block": {"rules": ["paragraph"]}, - "inline": {"rules": ["text"], "rules2": ["balance_pairs", "text_collapse"]}, + "inline": { + "rules": ["text"], + "rules2": ["balance_pairs", "fragments_join"], + }, }, } diff --git a/markdown_it/rules_core/__init__.py b/markdown_it/rules_core/__init__.py index f80034c5..c9c5368c 100644 --- a/markdown_it/rules_core/__init__.py +++ b/markdown_it/rules_core/__init__.py @@ -6,6 +6,7 @@ "replace", "smartquotes", "linkify", + "text_join", ) from .block import block @@ -15,3 +16,4 @@ from .replacements import replace from .smartquotes import smartquotes from .state_core import StateCore +from .text_join import text_join diff --git a/markdown_it/rules_core/text_join.py b/markdown_it/rules_core/text_join.py new file mode 100644 index 00000000..d54ccbbc --- /dev/null +++ b/markdown_it/rules_core/text_join.py @@ -0,0 +1,34 @@ +"""Join raw text tokens with the rest of the text + +This is set as a separate rule to provide an opportunity for plugins +to run text replacements after text join, but before escape join. + +For example, `\\:)` shouldn't be replaced with an emoji. +""" +from __future__ import annotations + +from ..token import Token +from .state_core import StateCore + + +def text_join(state: StateCore) -> None: + """Join raw text for escape sequences (`text_special`) tokens with the rest of the text""" + + for inline_token in state.tokens[:]: + if inline_token.type != "inline": + continue + + # convert text_special to text and join all adjacent text nodes + new_tokens: list[Token] = [] + for child_token in inline_token.children or []: + if child_token.type == "text_special": + child_token.type = "text" + if ( + child_token.type == "text" + and new_tokens + and new_tokens[-1].type == "text" + ): + new_tokens[-1].content += child_token.content + else: + new_tokens.append(child_token) + inline_token.children = new_tokens diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py index f27907ce..dde97d34 100644 --- a/markdown_it/rules_inline/__init__.py +++ b/markdown_it/rules_inline/__init__.py @@ -1,7 +1,7 @@ __all__ = ( "StateInline", "text", - "text_collapse", + "fragments_join", "link_pairs", "escape", "newline", @@ -20,10 +20,10 @@ from .balance_pairs import link_pairs from .entity import entity from .escape import escape +from .fragments_join import fragments_join from .html_inline import html_inline from .image import image from .link import link from .newline import newline from .state_inline import StateInline from .text import text -from .text_collapse import text_collapse diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 8694cec1..9f68b5df 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -4,6 +4,58 @@ from ..common.utils import isStrSpace from .state_inline import StateInline + +def escape(state: StateInline, silent: bool) -> bool: + """Process escaped chars and hardbreaks.""" + pos = state.pos + maximum = state.posMax + + if state.src[pos] != "\\": + return False + + pos += 1 + + # '\' at the end of the inline block + if pos >= maximum: + return False + + ch1 = state.src[pos] + ch1_ord = ord(ch1) + if ch1 == "\n": + if not silent: + state.push("hardbreak", "br", 0) + pos += 1 + # skip leading whitespaces from next line + while pos < maximum: + ch = state.src[pos] + if not isStrSpace(ch): + break + pos += 1 + + state.pos = pos + return True + + escapedStr = state.src[pos] + + if ch1_ord >= 0xD800 and ch1_ord <= 0xDBFF and pos + 1 < maximum: + ch2 = state.src[pos + 1] + ch2_ord = ord(ch2) + if ch2_ord >= 0xDC00 and ch2_ord <= 0xDFFF: + escapedStr += ch2 + pos += 1 + + origStr = "\\" + escapedStr + + if not silent: + token = state.push("text_special", "", 0) + token.content = escapedStr if ch1 in _ESCAPED else origStr + token.markup = origStr + token.info = "escape" + + state.pos = pos + 1 + return True + + _ESCAPED = { "!", '"', @@ -38,42 +90,3 @@ "}", "~", } - - -def escape(state: StateInline, silent: bool) -> bool: - pos = state.pos - maximum = state.posMax - - if state.src[pos] != "\\": - return False - - pos += 1 - - if pos < maximum: - ch = state.src[pos] - - if ch in _ESCAPED: - if not silent: - state.pending += state.src[pos] - state.pos += 2 - return True - - if ch == "\n": - if not silent: - state.push("hardbreak", "br", 0) - - pos += 1 - # skip leading whitespaces from next line - while pos < maximum: - ch = state.src[pos] - if not isStrSpace(ch): - break - pos += 1 - - state.pos = pos - return True - - if not silent: - state.pending += "\\" - state.pos += 1 - return True diff --git a/markdown_it/rules_inline/text_collapse.py b/markdown_it/rules_inline/fragments_join.py similarity index 96% rename from markdown_it/rules_inline/text_collapse.py rename to markdown_it/rules_inline/fragments_join.py index e09289cf..f795c136 100644 --- a/markdown_it/rules_inline/text_collapse.py +++ b/markdown_it/rules_inline/fragments_join.py @@ -1,7 +1,7 @@ from .state_inline import StateInline -def text_collapse(state: StateInline) -> None: +def fragments_join(state: StateInline) -> None: """ Clean up tokens after emphasis and strikethrough postprocessing: merge adjacent text nodes into one and re-calculate all token levels diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py index c3a9ac8b..64a2bbe8 100644 --- a/tests/test_api/test_main.py +++ b/tests/test_api/test_main.py @@ -13,6 +13,7 @@ def test_get_rules(): "linkify", "replacements", "smartquotes", + "text_join", ], "block": [ "table", @@ -40,7 +41,7 @@ def test_get_rules(): "html_inline", "entity", ], - "inline2": ["balance_pairs", "strikethrough", "emphasis", "text_collapse"], + "inline2": ["balance_pairs", "strikethrough", "emphasis", "fragments_join"], } @@ -48,13 +49,13 @@ def test_load_presets(): md = MarkdownIt("zero") assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } md = MarkdownIt("commonmark") assert md.get_active_rules() == { - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "block": [ "code", "fence", @@ -79,7 +80,7 @@ def test_load_presets(): "html_inline", "entity", ], - "inline2": ["balance_pairs", "emphasis", "text_collapse"], + "inline2": ["balance_pairs", "emphasis", "fragments_join"], } @@ -94,16 +95,16 @@ def test_enable(): md = MarkdownIt("zero").enable("heading") assert md.get_active_rules() == { "block": ["heading", "paragraph"], - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } md.enable(["backticks", "autolink"]) assert md.get_active_rules() == { "block": ["heading", "paragraph"], - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "inline": ["text", "backticks", "autolink"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } @@ -111,16 +112,16 @@ def test_disable(): md = MarkdownIt("zero").disable("inline") assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block"], + "core": ["normalize", "block", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } md.disable(["text"]) assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block"], + "core": ["normalize", "block", "text_join"], "inline": [], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } @@ -130,15 +131,15 @@ def test_reset(): md.disable("inline") assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block"], + "core": ["normalize", "block", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } diff --git a/tests/test_port/fixtures/linkify.md b/tests/test_port/fixtures/linkify.md index 9edb78f3..c9755c03 100644 --- a/tests/test_port/fixtures/linkify.md +++ b/tests/test_port/fixtures/linkify.md @@ -96,4 +96,11 @@ after

    before

    github.com

    after

    -. \ No newline at end of file +. + +Don't match escaped +. +google\.com +. +

    google.com

    +. diff --git a/tests/test_port/fixtures/smartquotes.md b/tests/test_port/fixtures/smartquotes.md index 70378b8e..e77175aa 100644 --- a/tests/test_port/fixtures/smartquotes.md +++ b/tests/test_port/fixtures/smartquotes.md @@ -163,4 +163,17 @@ Should parse quotes adjacent to inline html, #677: .

    “test


    test”

    -. \ No newline at end of file +. + +Should be escapable: +. +"foo" + +\"foo" + +"foo\" +. +

    “foo”

    +

    "foo"

    +

    "foo"

    +. diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md index 39154ed0..59e48941 100644 --- a/tests/test_port/fixtures/typographer.md +++ b/tests/test_port/fixtures/typographer.md @@ -81,6 +81,13 @@ dupes-ellipsis

    !.. ?.. ,… !!!.. ???.. ,…

    . +copyright should be escapable +. +\(c) +. +

    (c)

    +. + dashes . @@ -101,6 +108,16 @@ markdownit--awesome

    markdownit–awesome

    . +dashes should be escapable +. +foo \-- bar + +foo -\- bar +. +

    foo -- bar

    +

    foo -- bar

    +. + regression tests for #624 . 1---2---3 From ea27cc86ca52c7ca1876fd1c550a518ecb61ecbe Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 20:02:17 +0200 Subject: [PATCH 21/28] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor:=20Add=20li?= =?UTF-8?q?nkifier=20rule=20to=20inline=20chain=20for=20full=20links=20(#2?= =?UTF-8?q?79)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes collision of emphasis and linkifier (so `http://example.org/foo._bar_-_baz` is now a single link, not emphasized). Emails and fuzzy links are not affected by this. Implements upstream: https://github.com/markdown-it/markdown-it/commit/6b58ec4245abe2e293c79bd7daabf4543ef46399 --- markdown_it/common/utils.py | 12 +++ markdown_it/parser_inline.py | 1 + markdown_it/presets/__init__.py | 2 +- markdown_it/rules_core/linkify.py | 66 ++++++++------ markdown_it/rules_inline/__init__.py | 2 + markdown_it/rules_inline/html_inline.py | 6 ++ markdown_it/rules_inline/link.py | 2 + markdown_it/rules_inline/linkify.py | 61 +++++++++++++ markdown_it/rules_inline/state_inline.py | 4 + tests/test_api/test_main.py | 1 + tests/test_port/fixtures/linkify.md | 107 +++++++++++++++++++++++ 11 files changed, 234 insertions(+), 30 deletions(-) create mode 100644 markdown_it/rules_inline/linkify.py diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 6bf9a36f..0d11e3e3 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -304,3 +304,15 @@ def normalizeReference(string: str) -> str: # most notably, `__proto__`) # return string.lower().upper() + + +LINK_OPEN_RE = re.compile(r"^\s]", flags=re.IGNORECASE) +LINK_CLOSE_RE = re.compile(r"^", flags=re.IGNORECASE) + + +def isLinkOpen(string: str) -> bool: + return bool(LINK_OPEN_RE.search(string)) + + +def isLinkClose(string: str) -> bool: + return bool(LINK_CLOSE_RE.search(string)) diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index 88140d3d..febe4e6e 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -16,6 +16,7 @@ # Parser rules _rules: list[tuple[str, RuleFunc]] = [ ("text", rules_inline.text), + ("linkify", rules_inline.linkify), ("newline", rules_inline.newline), ("escape", rules_inline.escape), ("backticks", rules_inline.backtick), diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py index f1cb0507..1e6796a2 100644 --- a/markdown_it/presets/__init__.py +++ b/markdown_it/presets/__init__.py @@ -21,7 +21,7 @@ def make() -> PresetType: config = commonmark.make() config["components"]["core"]["rules"].append("linkify") config["components"]["block"]["rules"].append("table") - config["components"]["inline"]["rules"].append("strikethrough") + config["components"]["inline"]["rules"].extend(["strikethrough", "linkify"]) config["components"]["inline"]["rules2"].append("strikethrough") config["options"]["linkify"] = True config["options"]["html"] = True diff --git a/markdown_it/rules_core/linkify.py b/markdown_it/rules_core/linkify.py index 49bb4ef3..efbc9d4c 100644 --- a/markdown_it/rules_core/linkify.py +++ b/markdown_it/rules_core/linkify.py @@ -1,41 +1,32 @@ +from __future__ import annotations + import re +from typing import Protocol -from ..common.utils import arrayReplaceAt +from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen from ..token import Token from .state_core import StateCore -LINK_OPEN_RE = re.compile(r"^\s]", flags=re.IGNORECASE) -LINK_CLOSE_RE = re.compile(r"^", flags=re.IGNORECASE) - HTTP_RE = re.compile(r"^http://") MAILTO_RE = re.compile(r"^mailto:") TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE) -def isLinkOpen(string: str) -> bool: - return bool(LINK_OPEN_RE.search(string)) - - -def isLinkClose(string: str) -> bool: - return bool(LINK_CLOSE_RE.search(string)) - - def linkify(state: StateCore) -> None: - blockTokens = state.tokens - + """Rule for identifying plain-text links.""" if not state.md.options.linkify: return if not state.md.linkify: raise ModuleNotFoundError("Linkify enabled but not installed.") - for j in range(len(blockTokens)): - if blockTokens[j].type != "inline" or not state.md.linkify.pretest( - blockTokens[j].content + for inline_token in state.tokens: + if inline_token.type != "inline" or not state.md.linkify.pretest( + inline_token.content ): continue - tokens = blockTokens[j].children + tokens = inline_token.children htmlLinkLevel = 0 @@ -71,38 +62,47 @@ def linkify(state: StateCore) -> None: currentToken.content ): text = currentToken.content - links = state.md.linkify.match(text) + links: list[_LinkType] = state.md.linkify.match(text) or [] # Now split string to nodes nodes = [] level = currentToken.level lastPos = 0 - for ln in range(len(links)): - url = links[ln].url + # forbid escape sequence at the start of the string, + # this avoids http\://example.com/ from being linkified as + # http://example.com/ + if ( + links + and links[0].index == 0 + and i > 0 + and tokens[i - 1].type == "text_special" + ): + links = links[1:] + + for link in links: + url = link.url fullUrl = state.md.normalizeLink(url) if not state.md.validateLink(fullUrl): continue - urlText = links[ln].text + urlText = link.text # Linkifier might send raw hostnames like "example.com", where url # starts with domain name. So we prepend http:// in those cases, # and remove it afterwards. - if not links[ln].schema: + if not link.schema: urlText = HTTP_RE.sub( "", state.md.normalizeLinkText("http://" + urlText) ) - elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search( - urlText - ): + elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText): urlText = MAILTO_RE.sub( "", state.md.normalizeLinkText("mailto:" + urlText) ) else: urlText = state.md.normalizeLinkText(urlText) - pos = links[ln].index + pos = link.index if pos > lastPos: token = Token("text", "", 0) @@ -130,7 +130,7 @@ def linkify(state: StateCore) -> None: token.info = "auto" nodes.append(token) - lastPos = links[ln].last_index + lastPos = link.last_index if lastPos < len(text): token = Token("text", "", 0) @@ -138,4 +138,12 @@ def linkify(state: StateCore) -> None: token.level = level nodes.append(token) - blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes) + inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes) + + +class _LinkType(Protocol): + url: str + text: str + index: int + last_index: int + schema: str | None diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py index dde97d34..3a8026ec 100644 --- a/markdown_it/rules_inline/__init__.py +++ b/markdown_it/rules_inline/__init__.py @@ -3,6 +3,7 @@ "text", "fragments_join", "link_pairs", + "linkify", "escape", "newline", "backtick", @@ -24,6 +25,7 @@ from .html_inline import html_inline from .image import image from .link import link +from .linkify import linkify from .newline import newline from .state_inline import StateInline from .text import text diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 3c8b5331..9065e1d0 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -1,5 +1,6 @@ # Process html tags from ..common.html_re import HTML_TAG_RE +from ..common.utils import isLinkClose, isLinkOpen from .state_inline import StateInline @@ -33,5 +34,10 @@ def html_inline(state: StateInline, silent: bool) -> bool: token = state.push("html_inline", "", 0) token.content = state.src[pos : pos + len(match.group(0))] + if isLinkOpen(token.content): + state.linkLevel += 1 + if isLinkClose(token.content): + state.linkLevel -= 1 + state.pos += len(match.group(0)) return True diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 18c0736c..78cf9122 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -140,7 +140,9 @@ def link(state: StateInline, silent: bool) -> bool: if label and state.md.options.get("store_labels", False): token.meta["label"] = label + state.linkLevel += 1 state.md.inline.tokenize(state) + state.linkLevel -= 1 token = state.push("link_close", "a", -1) diff --git a/markdown_it/rules_inline/linkify.py b/markdown_it/rules_inline/linkify.py new file mode 100644 index 00000000..a8a18153 --- /dev/null +++ b/markdown_it/rules_inline/linkify.py @@ -0,0 +1,61 @@ +"""Process links like https://example.org/""" +import re + +from .state_inline import StateInline + +# RFC3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +SCHEME_RE = re.compile(r"(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$", re.IGNORECASE) + + +def linkify(state: StateInline, silent: bool) -> bool: + """Rule for identifying plain-text links.""" + if not state.md.options.linkify: + return False + if state.linkLevel > 0: + return False + if not state.md.linkify: + raise ModuleNotFoundError("Linkify enabled but not installed.") + + pos = state.pos + maximum = state.posMax + + if ( + (pos + 3) > maximum + or state.src[pos] != ":" + or state.src[pos + 1] != "/" + or state.src[pos + 2] != "/" + ): + return False + + if not (match := SCHEME_RE.match(state.pending)): + return False + + proto = match.group(1) + if not (link := state.md.linkify.match_at_start(state.src[pos - len(proto) :])): + return False + url: str = link.url + + # disallow '*' at the end of the link (conflicts with emphasis) + url = url.rstrip("*") + + full_url = state.md.normalizeLink(url) + if not state.md.validateLink(full_url): + return False + + if not silent: + state.pending = state.pending[: -len(proto)] + + token = state.push("link_open", "a", 1) + token.attrs = {"href": full_url} + token.markup = "linkify" + token.info = "auto" + + token = state.push("text", "", 0) + token.content = state.md.normalizeLinkText(url) + + token = state.push("link_close", "a", -1) + token.markup = "linkify" + token.info = "auto" + + state.pos += len(url) - len(proto) + return True diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 143ab33e..c0c491c4 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -70,6 +70,10 @@ def __init__( self.backticks: dict[int, int] = {} self.backticksScanned = False + # Counter used to disable inline linkify-it execution + # inside and markdown links + self.linkLevel = 0 + def __repr__(self) -> str: return ( f"{self.__class__.__name__}" diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py index 64a2bbe8..178d717e 100644 --- a/tests/test_api/test_main.py +++ b/tests/test_api/test_main.py @@ -30,6 +30,7 @@ def test_get_rules(): ], "inline": [ "text", + "linkify", "newline", "escape", "backticks", diff --git a/tests/test_port/fixtures/linkify.md b/tests/test_port/fixtures/linkify.md index c9755c03..f51bb6b9 100644 --- a/tests/test_port/fixtures/linkify.md +++ b/tests/test_port/fixtures/linkify.md @@ -29,6 +29,84 @@ don't touch text in html tags

    https://example.com

    . +entities inside raw links +. +https://example.com/foo&bar +. +

    https://example.com/foo&amp;bar

    +. + + +emphasis inside raw links (asterisk, can happen in links with params) +. +https://example.com/foo*bar*baz +. +

    https://example.com/foo*bar*baz

    +. + + +emphasis inside raw links (underscore) +. +http://example.org/foo._bar_-_baz +. +

    http://example.org/foo._bar_-_baz

    +. + + +backticks inside raw links +. +https://example.com/foo`bar`baz +. +

    https://example.com/foo`bar`baz

    +. + + +links inside raw links +. +https://example.com/foo[123](456)bar +. +

    https://example.com/foo[123](456)bar

    +. + + +escapes not allowed at the start +. +\https://example.com +. +

    \https://example.com

    +. + + +escapes not allowed at comma +. +https\://example.com +. +

    https://example.com

    +. + + +escapes not allowed at slashes +. +https:\//aa.org https://bb.org +. +

    https://aa.org https://bb.org

    +. + + +fuzzy link shouldn't match cc.org +. +https:/\/cc.org +. +

    https://cc.org

    +. + + +bold links (exclude markup of pairs from link tail) +. +**http://example.com/foobar** +. +

    http://example.com/foobar

    +. match links without protocol . @@ -37,6 +115,35 @@ www.example.org

    www.example.org

    . +coverage, prefix not valid +. +http:/example.com/ +. +

    http:/example.com/

    +. + + +coverage, negative link level +. +[https://example.com](https://example.com) +. +

    https://example.com

    +. + + +emphasis with '*', real link: +. +http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B +. +

    http://cdecl.ridiculousfish.com/?q=int+(*f)+(float+*)%3B

    +. + +emphasis with '_', real link: +. +https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf +. +

    https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf

    +. emails . From d5b3874539f50faf435ecb81355546cd8a63dd40 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 20:46:32 +0200 Subject: [PATCH 22/28] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Parse=20entities=20t?= =?UTF-8?q?o=20`text=5Fspecial`=20token=20(#280)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than adding directly to text. The `text_join` core rule then joins it to the text later, but after typographic rules have been applied. Implements upstream: https://github.com/markdown-it/markdown-it/commita/3fc0deb38b5a8b2eb8f46c727cc4e299e5ae5f9c --- markdown_it/rules_inline/entity.py | 67 +++++++++++++------------ tests/test_port/fixtures/smartquotes.md | 13 +++++ tests/test_port/fixtures/typographer.md | 7 +++ 3 files changed, 55 insertions(+), 32 deletions(-) diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index d3b5f6bb..ec9d3965 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -16,35 +16,38 @@ def entity(state: StateInline, silent: bool) -> bool: if state.src[pos] != "&": return False - if (pos + 1) < maximum: - if state.src[pos + 1] == "#": - match = DIGITAL_RE.search(state.src[pos:]) - if match: - if not silent: - match1 = match.group(1) - code = ( - int(match1[1:], 16) - if match1[0].lower() == "x" - else int(match1, 10) - ) - state.pending += ( - fromCodePoint(code) - if isValidEntityCode(code) - else fromCodePoint(0xFFFD) - ) - - state.pos += len(match.group(0)) - return True - - else: - match = NAMED_RE.search(state.src[pos:]) - if match and match.group(1) in entities: - if not silent: - state.pending += entities[match.group(1)] - state.pos += len(match.group(0)) - return True - - if not silent: - state.pending += "&" - state.pos += 1 - return True + if pos + 1 >= maximum: + return False + + if state.src[pos + 1] == "#": + if match := DIGITAL_RE.search(state.src[pos:]): + if not silent: + match1 = match.group(1) + code = ( + int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10) + ) + + token = state.push("text_special", "", 0) + token.content = ( + fromCodePoint(code) + if isValidEntityCode(code) + else fromCodePoint(0xFFFD) + ) + token.markup = match.group(0) + token.info = "entity" + + state.pos += len(match.group(0)) + return True + + else: + if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities: + if not silent: + token = state.push("text_special", "", 0) + token.content = entities[match.group(1)] + token.markup = match.group(0) + token.info = "entity" + + state.pos += len(match.group(0)) + return True + + return False diff --git a/tests/test_port/fixtures/smartquotes.md b/tests/test_port/fixtures/smartquotes.md index e77175aa..8ed314e2 100644 --- a/tests/test_port/fixtures/smartquotes.md +++ b/tests/test_port/fixtures/smartquotes.md @@ -177,3 +177,16 @@ Should be escapable:

    "foo"

    "foo"

    . + +Should not replace entities: +. +"foo" + +"foo" + +"foo" +. +

    "foo"

    +

    "foo"

    +

    "foo"

    +. diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md index 59e48941..d72a7c2f 100644 --- a/tests/test_port/fixtures/typographer.md +++ b/tests/test_port/fixtures/typographer.md @@ -130,3 +130,10 @@ regression tests for #624

    1–2–3

    1 – – 3

    . + +shouldn't replace entities +. +(c) (c) (c) +. +

    (c) (c) ©

    +. From dd51f6222ed5e93ecfcf4514f67c71be99430011 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 21:18:21 +0200 Subject: [PATCH 23/28] =?UTF-8?q?=E2=80=BC=EF=B8=8F=20Remove=20(p)=20=3D>?= =?UTF-8?q?=20=C2=A7=20replacement=20in=20typographer=20(#281)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `(p)` is no longer replaced with § by typographer (conflicts with ℗) Implements upstream: https://github.com/markdown-it/markdown-it/commit/f52351499be1e6c838110c31e07154cce1d91d47 --- markdown_it/port.yaml | 6 +++--- markdown_it/rules_core/replacements.py | 9 ++++----- tests/test_port/fixtures/typographer.md | 9 --------- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index a553fe1a..3e289e9e 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it - version: 12.3.2 - commit: d72c68b520cedacae7878caa92bf7fe32e3e0e6f - date: Jan 8, 2022 + version: 13.0.1 + commit: e843acc9edad115cbf8cf85e676443f01658be08 + date: May 3, 2022 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index 0b6e86af..14912e17 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -3,7 +3,6 @@ * ``(c)``, ``(C)`` → © * ``(tm)``, ``(TM)`` → ™ * ``(r)``, ``(R)`` → ® -* ``(p)``, ``(P)`` → § * ``+-`` → ± * ``...`` → … * ``?....`` → ?.. @@ -26,15 +25,15 @@ # TODO: # - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾ -# - miltiplication 2 x 4 -> 2 × 4 +# - multiplication 2 x 4 -> 2 × 4 RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--") # Workaround for phantomjs - need regex without /g flag, # or root check will fail every second time -# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)" +# SCOPED_ABBR_TEST_RE = r"\((c|tm|r)\)" -SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE) +SCOPED_ABBR_RE = re.compile(r"\((c|tm|r)\)", flags=re.IGNORECASE) PLUS_MINUS_RE = re.compile(r"\+-") @@ -53,7 +52,7 @@ EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE) -SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"} +SCOPED_ABBR = {"c": "©", "r": "®", "tm": "™"} def replaceFn(match: re.Match[str]) -> str: diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md index d72a7c2f..23825e5d 100644 --- a/tests/test_port/fixtures/typographer.md +++ b/tests/test_port/fixtures/typographer.md @@ -41,15 +41,6 @@ trademark

    ™ ™

    . - -paragraph -. -(p) (P) -. -

    § §

    -. - - plus-minus . +-5 From 64965cfdac910a30a6a886ab3e5e710fa2649360 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 21:27:40 +0200 Subject: [PATCH 24/28] =?UTF-8?q?=F0=9F=91=8C=20Show=20text=20of=20`text?= =?UTF-8?q?=5Fspecial`=20in=20`tree.pretty`=20(#282)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provides a demonstration of how the new `text_special` token parsing works --- markdown_it/tree.py | 7 ++++++- tests/test_tree.py | 7 +++++++ tests/test_tree/test_pretty_text_special.xml | 11 +++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 tests/test_tree/test_pretty_text_special.xml diff --git a/markdown_it/tree.py b/markdown_it/tree.py index a39ba32a..6641e5a4 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -229,7 +229,12 @@ def pretty( if not self.is_root and self.attrs: text += " " + " ".join(f"{k}={v!r}" for k, v in self.attrs.items()) text += ">" - if show_text and not self.is_root and self.type == "text" and self.content: + if ( + show_text + and not self.is_root + and self.type in ("text", "text_special") + and self.content + ): text += "\n" + textwrap.indent(self.content, prefix + " " * indent) for child in self.children: text += "\n" + child.pretty( diff --git a/tests/test_tree.py b/tests/test_tree.py index c5203b0b..36bd0b67 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -76,6 +76,13 @@ def test_pretty(file_regression): file_regression.check(node.pretty(indent=2, show_text=True), extension=".xml") +def test_pretty_text_special(file_regression): + md = MarkdownIt() + md.disable("text_join") + tree = SyntaxTreeNode(md.parse("foo © bar \\(")) + file_regression.check(tree.pretty(show_text=True), extension=".xml") + + def test_walk(): tokens = MarkdownIt().parse(EXAMPLE_MARKDOWN) tree = SyntaxTreeNode(tokens) diff --git a/tests/test_tree/test_pretty_text_special.xml b/tests/test_tree/test_pretty_text_special.xml new file mode 100644 index 00000000..211d790c --- /dev/null +++ b/tests/test_tree/test_pretty_text_special.xml @@ -0,0 +1,11 @@ + + + + + foo + + © + + bar + + ( \ No newline at end of file From 90b367d9c340cb6db1cf3e0ebf96127ddc72497f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 22:47:54 +0200 Subject: [PATCH 25/28] =?UTF-8?q?=F0=9F=94=A7=20Add=20typing=20of=20rule?= =?UTF-8?q?=20functions=20(#283)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rule functions signature is specific to the state it acts on. --- docs/conf.py | 1 - markdown_it/parser_block.py | 12 +++++-- markdown_it/parser_core.py | 10 ++++-- markdown_it/parser_inline.py | 19 +++++++---- markdown_it/ruler.py | 45 +++++++++++++------------- markdown_it/rules_block/lheading.py | 3 +- markdown_it/rules_block/paragraph.py | 3 +- tests/test_api/test_plugin_creation.py | 20 ++++++------ 8 files changed, 65 insertions(+), 48 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 2b48df1e..4fa12262 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,7 +53,6 @@ ".*Literal.*", ".*_Result", "EnvType", - "RuleFunc", "Path", "Ellipsis", ) diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 86f08cf5..32749127 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -2,7 +2,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Callable from . import rules_block from .ruler import Ruler @@ -16,7 +16,13 @@ LOGGER = logging.getLogger(__name__) -_rules: list[tuple[str, Any, list[str]]] = [ +RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool] +"""(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool) + +`silent` disables token generation, useful for lookahead. +""" + +_rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [ # First 2 params - rule name & source. Secondary array - list of rules, # which can be terminated by this one. ("table", rules_block.table, ["paragraph", "reference"]), @@ -45,7 +51,7 @@ class ParserBlock: """ def __init__(self) -> None: - self.ruler = Ruler() + self.ruler = Ruler[RuleFuncBlockType]() for name, rule, alt in _rules: self.ruler.push(name, rule, {"alt": alt}) diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index b3eb8abe..ca5ab256 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -6,7 +6,9 @@ """ from __future__ import annotations -from .ruler import RuleFunc, Ruler +from typing import Callable + +from .ruler import Ruler from .rules_core import ( block, inline, @@ -18,7 +20,9 @@ ) from .rules_core.state_core import StateCore -_rules: list[tuple[str, RuleFunc]] = [ +RuleFuncCoreType = Callable[[StateCore], None] + +_rules: list[tuple[str, RuleFuncCoreType]] = [ ("normalize", normalize), ("block", block), ("inline", inline), @@ -31,7 +35,7 @@ class ParserCore: def __init__(self) -> None: - self.ruler = Ruler() + self.ruler = Ruler[RuleFuncCoreType]() for name, rule in _rules: self.ruler.push(name, rule) diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index febe4e6e..0026c383 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -2,10 +2,10 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Callable from . import rules_inline -from .ruler import RuleFunc, Ruler +from .ruler import Ruler from .rules_inline.state_inline import StateInline from .token import Token from .utils import EnvType @@ -13,8 +13,14 @@ if TYPE_CHECKING: from markdown_it import MarkdownIt + # Parser rules -_rules: list[tuple[str, RuleFunc]] = [ +RuleFuncInlineType = Callable[[StateInline, bool], bool] +"""(state: StateInline, silent: bool) -> matched: bool) + +`silent` disables token generation, useful for lookahead. +""" +_rules: list[tuple[str, RuleFuncInlineType]] = [ ("text", rules_inline.text), ("linkify", rules_inline.linkify), ("newline", rules_inline.newline), @@ -34,7 +40,8 @@ # # Don't use this for anything except pairs (plugins working with `balance_pairs`). # -_rules2: list[tuple[str, RuleFunc]] = [ +RuleFuncInline2Type = Callable[[StateInline], None] +_rules2: list[tuple[str, RuleFuncInline2Type]] = [ ("balance_pairs", rules_inline.link_pairs), ("strikethrough", rules_inline.strikethrough.postProcess), ("emphasis", rules_inline.emphasis.postProcess), @@ -46,11 +53,11 @@ class ParserInline: def __init__(self) -> None: - self.ruler = Ruler() + self.ruler = Ruler[RuleFuncInlineType]() for name, rule in _rules: self.ruler.push(name, rule) # Second ruler used for post-processing (e.g. in emphasis-like rules) - self.ruler2 = Ruler() + self.ruler2 = Ruler[RuleFuncInline2Type]() for name, rule2 in _rules2: self.ruler2.push(name, rule2) diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 9849561d..bd8baba3 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -17,9 +17,9 @@ class Ruler """ from __future__ import annotations -from collections.abc import Callable, Iterable +from collections.abc import Iterable from dataclasses import dataclass, field -from typing import TYPE_CHECKING, TypedDict +from typing import TYPE_CHECKING, Generic, TypedDict, TypeVar import warnings from markdown_it._compat import DATACLASS_KWARGS @@ -57,33 +57,30 @@ def srcCharCode(self) -> tuple[int, ...]: return self._srcCharCode -# The first positional arg is always a subtype of `StateBase`. Other -# arguments may or may not exist, based on the rule's type (block, -# core, inline). Return type is either `None` or `bool` based on the -# rule's type. -RuleFunc = Callable # type: ignore - - class RuleOptionsType(TypedDict, total=False): alt: list[str] +RuleFuncTv = TypeVar("RuleFuncTv") +"""A rule function, whose signature is dependent on the state type.""" + + @dataclass(**DATACLASS_KWARGS) -class Rule: +class Rule(Generic[RuleFuncTv]): name: str enabled: bool - fn: RuleFunc = field(repr=False) + fn: RuleFuncTv = field(repr=False) alt: list[str] -class Ruler: +class Ruler(Generic[RuleFuncTv]): def __init__(self) -> None: # List of added rules. - self.__rules__: list[Rule] = [] + self.__rules__: list[Rule[RuleFuncTv]] = [] # Cached rule chains. # First level - chain name, '' for default. # Second level - diginal anchor for fast filtering by charcodes. - self.__cache__: dict[str, list[RuleFunc]] | None = None + self.__cache__: dict[str, list[RuleFuncTv]] | None = None def __find__(self, name: str) -> int: """Find rule index by name""" @@ -112,7 +109,7 @@ def __compile__(self) -> None: self.__cache__[chain].append(rule.fn) def at( - self, ruleName: str, fn: RuleFunc, options: RuleOptionsType | None = None + self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None ) -> None: """Replace rule by name with new function & options. @@ -133,7 +130,7 @@ def before( self, beforeName: str, ruleName: str, - fn: RuleFunc, + fn: RuleFuncTv, options: RuleOptionsType | None = None, ) -> None: """Add new rule to chain before one with given name. @@ -148,14 +145,16 @@ def before( options = options or {} if index == -1: raise KeyError(f"Parser rule not found: {beforeName}") - self.__rules__.insert(index, Rule(ruleName, True, fn, options.get("alt", []))) + self.__rules__.insert( + index, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", [])) + ) self.__cache__ = None def after( self, afterName: str, ruleName: str, - fn: RuleFunc, + fn: RuleFuncTv, options: RuleOptionsType | None = None, ) -> None: """Add new rule to chain after one with given name. @@ -171,12 +170,12 @@ def after( if index == -1: raise KeyError(f"Parser rule not found: {afterName}") self.__rules__.insert( - index + 1, Rule(ruleName, True, fn, options.get("alt", [])) + index + 1, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", [])) ) self.__cache__ = None def push( - self, ruleName: str, fn: RuleFunc, options: RuleOptionsType | None = None + self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None ) -> None: """Push new rule to the end of chain. @@ -185,7 +184,9 @@ def push( :param options: new rule options (not mandatory). """ - self.__rules__.append(Rule(ruleName, True, fn, (options or {}).get("alt", []))) + self.__rules__.append( + Rule[RuleFuncTv](ruleName, True, fn, (options or {}).get("alt", [])) + ) self.__cache__ = None def enable( @@ -252,7 +253,7 @@ def disable( self.__cache__ = None return result - def getRules(self, chainName: str) -> list[RuleFunc]: + def getRules(self, chainName: str = "") -> list[RuleFuncTv]: """Return array of active functions (rules) for given chain name. It analyzes rules configuration, compiles caches if not exists and returns result. diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index fbd50699..3522207a 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -1,7 +1,6 @@ # lheading (---, ==) import logging -from ..ruler import Ruler from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -12,7 +11,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b level = None nextLine = startLine + 1 - ruler: Ruler = state.md.block.ruler + ruler = state.md.block.ruler terminatorRules = ruler.getRules("paragraph") if state.is_code_block(startLine): diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py index 3c7d43d3..5388a4b1 100644 --- a/markdown_it/rules_block/paragraph.py +++ b/markdown_it/rules_block/paragraph.py @@ -1,7 +1,6 @@ """Paragraph.""" import logging -from ..ruler import Ruler from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -13,7 +12,7 @@ def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> ) nextLine = startLine + 1 - ruler: Ruler = state.md.block.ruler + ruler = state.md.block.ruler terminatorRules = ruler.getRules("paragraph") endLine = state.lineMax diff --git a/tests/test_api/test_plugin_creation.py b/tests/test_api/test_plugin_creation.py index 3a9af8bb..d5bda748 100644 --- a/tests/test_api/test_plugin_creation.py +++ b/tests/test_api/test_plugin_creation.py @@ -6,10 +6,11 @@ def inline_rule(state, silent): print("plugin called") + return False def test_inline_after(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.inline.ruler.after("text", "new_rule", inline_rule) MarkdownIt().use(_plugin).parse("[") @@ -17,7 +18,7 @@ def _plugin(_md): def test_inline_before(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.inline.ruler.before("text", "new_rule", inline_rule) MarkdownIt().use(_plugin).parse("a") @@ -25,7 +26,7 @@ def _plugin(_md): def test_inline_at(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.inline.ruler.at("text", inline_rule) MarkdownIt().use(_plugin).parse("a") @@ -34,10 +35,11 @@ def _plugin(_md): def block_rule(state, startLine, endLine, silent): print("plugin called") + return False def test_block_after(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.block.ruler.after("hr", "new_rule", block_rule) MarkdownIt().use(_plugin).parse("a") @@ -45,7 +47,7 @@ def _plugin(_md): def test_block_before(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.block.ruler.before("hr", "new_rule", block_rule) MarkdownIt().use(_plugin).parse("a") @@ -53,7 +55,7 @@ def _plugin(_md): def test_block_at(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.block.ruler.at("hr", block_rule) MarkdownIt().use(_plugin).parse("a") @@ -65,7 +67,7 @@ def core_rule(state): def test_core_after(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.core.ruler.after("normalize", "new_rule", core_rule) MarkdownIt().use(_plugin).parse("a") @@ -73,7 +75,7 @@ def _plugin(_md): def test_core_before(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.core.ruler.before("normalize", "new_rule", core_rule) MarkdownIt().use(_plugin).parse("a") @@ -81,7 +83,7 @@ def _plugin(_md): def test_core_at(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.core.ruler.at("normalize", core_rule) MarkdownIt().use(_plugin).parse("a") From 4c3a34de7544ea8ef2cc0e752bf7d246b9a71d22 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 23:45:14 +0200 Subject: [PATCH 26/28] =?UTF-8?q?=E2=80=BC=EF=B8=8F=20Remove=20unused=20`s?= =?UTF-8?q?ilent`=20arg=20in=20`ParserBlock.tokenize`=20(#284)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .readthedocs.yml | 1 - markdown_it/parser_block.py | 4 +--- pyproject.toml | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 1d750008..611695db 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -7,7 +7,6 @@ python: path: . extra_requirements: - linkify - - plugins - rtd sphinx: diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 32749127..72360f9b 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -55,9 +55,7 @@ def __init__(self) -> None: for name, rule, alt in _rules: self.ruler.push(name, rule, {"alt": alt}) - def tokenize( - self, state: StateBlock, startLine: int, endLine: int, silent: bool = False - ) -> None: + def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None: """Generate tokens for input range.""" rules = self.ruler.getRules("") line = startLine diff --git a/pyproject.toml b/pyproject.toml index b0d64fb1..ea7cd036 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ compare = [ linkify = ["linkify-it-py>=1,<3"] plugins = ["mdit-py-plugins"] rtd = [ - "attrs", + "mdit-py-plugins @ git+https://github.com/executablebooks/mdit-py-plugins@master", "myst-parser", "pyyaml", "sphinx", From e146728ce5b1efe52c7a75c67abc2520c07f00bb Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 3 Jun 2023 00:13:58 +0200 Subject: [PATCH 27/28] =?UTF-8?q?=F0=9F=93=9A=20Update=20docs=20(#285)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/ISSUE_TEMPLATE/bug_report.yml | 63 ++++++++++++++++++++++++++ .github/ISSUE_TEMPLATE/config.yml | 4 ++ .github/ISSUE_TEMPLATE/enhancement.yml | 44 ++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/enhancement.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..df7e2306 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,63 @@ +name: Report a problem 🐛 +description: Problem reports are for when something behaves incorrectly, or differently from how you'd expect. +labels: [bug] +body: +- type: textarea + id: describe + attributes: + label: Describe the bug + description: | + Provide a short description (one or two sentences) about the problem. What did you expect to happen, and what is actually happening? + + If possible, provide screenshots or error messages that you've encountered. + value: | + **context** + When I do ___. + + **expectation** + I expected ___ to occur. + + **bug** + But instead ___ happens + Here's an error message I ran into... + + **problem** + This is a problem for people doing ___ because ___. + + validations: + required: true + +- type: textarea + id: reproduce + attributes: + label: Reproduce the bug + description: | + Provide information that others may use to re-produce this behavior. + For example: + + - Step-by-step instructions that others can follow. + - Links to a website that demonstrates the bug. + - Information about certain conditions that the bug pops up. + + placeholder: | + 1. Go to '...' + 2. Click on '....' + 3. Scroll down to '....' + 4. See error + validations: + required: true + +- type: textarea + id: environment + attributes: + label: List your environment + description: | + List the environment needed to reproduce the error. + Here are a few ideas: + + - Version of markdown-it-py + - Versions of mdit-py-plugins + - The version of Python you're using. + - Your operating system + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..46e5f734 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: Questions or general discussion ❓🗣️ + url: https://github.com/executablebooks/markdown-it-py/discussions + about: Use Disussions for general conversations that aren't meant for actionable Issues. diff --git a/.github/ISSUE_TEMPLATE/enhancement.yml b/.github/ISSUE_TEMPLATE/enhancement.yml new file mode 100644 index 00000000..68eac5e6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/enhancement.yml @@ -0,0 +1,44 @@ +name: Request an enhancement 💡 +description: Suggest an idea for this project +labels: [enhancement] +body: + +- type: textarea + id: context + attributes: + label: Context + description: | + - Provide background to help others understand this issue. + - Describe the problem or need you'd like to address. + validations: + required: true + + +- type: textarea + id: proposal + attributes: + label: Proposal + description: | + - A simple and clear description of what you're proposing. + - Ideas or constraints for how to implement this proposal + - Important considerations to think about or discuss + validations: + required: false + + +- type: textarea + id: tasks + attributes: + label: Tasks and updates + description: | + Use this area to track ongoing work and to-do items. + The more specific the better. + + _If you can't think of anything then just leave this blank and we can fill it in later! This can be filled in as we understand more about an issue._ + + placeholder: | + - [ ] Discuss and decide on what to do... + - [ ] Implement partial feature A... + + validations: + required: false From bee6d1953be75717a3f2f6a917da6f464bed421d Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 3 Jun 2023 08:31:15 +0200 Subject: [PATCH 28/28] =?UTF-8?q?=F0=9F=9A=80=20RELEASE:=20v3.0.0=20(#286)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 78 +++++++++++++++++++++++++++++++++++++++++ README.md | 1 + docs/index.md | 1 + markdown_it/__init__.py | 2 +- 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa8dc6b7..1d02f42d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,83 @@ # Change Log +## 3.0.0 - 2023-06-03 + +⚠️ This release contains some minor breaking changes in the internal API and improvements to the parsing strictness. + +**Full Changelog**: + +### ⬆️ UPGRADE: Drop support for Python 3.7 + +Also add testing for Python 3.11 + +### ⬆️ UPGRADE: Update from upstream markdown-it `12.2.0` to `13.0.0` + +A key change is the addition of a new `Token` type, `text_special`, which is used to represent HTML entities and backslash escaped characters. +This ensures that (core) typographic transformation rules are not incorrectly applied to these texts. +The final core rule is now the new `text_join` rule, which joins adjacent `text`/`text_special` tokens, +and so no `text_special` tokens should be present in the final token stream. +Any custom typographic rules should be inserted before `text_join`. + +A new `linkify` rule has also been added to the inline chain, which will linkify full URLs (e.g. `https://example.com`), +and fixes collision of emphasis and linkifier (so `http://example.org/foo._bar_-_baz` is now a single link, not emphasized). +Emails and fuzzy links are not affected by this. + +* ♻️ Refactor backslash escape logic, add `text_special` [#276](https://github.com/executablebooks/markdown-it-py/pull/276) +* ♻️ Parse entities to `text_special` token [#280](https://github.com/executablebooks/markdown-it-py/pull/280) +* ♻️ Refactor: Add linkifier rule to inline chain for full links [#279](https://github.com/executablebooks/markdown-it-py/pull/279) +* ‼️ Remove `(p)` => `§` replacement in typographer [#281](https://github.com/executablebooks/markdown-it-py/pull/281) +* ‼️ Remove unused `silent` arg in `ParserBlock.tokenize` [#284](https://github.com/executablebooks/markdown-it-py/pull/284) +* 🐛 FIX: numeric character reference passing [#272](https://github.com/executablebooks/markdown-it-py/pull/272) +* 🐛 Fix: tab preventing paragraph continuation in lists [#274](https://github.com/executablebooks/markdown-it-py/pull/274) +* 👌 Improve nested emphasis parsing [#273](https://github.com/executablebooks/markdown-it-py/pull/273) +* 👌 fix possible ReDOS in newline rule [#275](https://github.com/executablebooks/markdown-it-py/pull/275) +* 👌 Improve performance of `skipSpaces`/`skipChars` [#271](https://github.com/executablebooks/markdown-it-py/pull/271) +* 👌 Show text of `text_special` in `tree.pretty` [#282](https://github.com/executablebooks/markdown-it-py/pull/282) + +### ♻️ REFACTOR: Replace most character code use with strings + +The use of `StateBase.srcCharCode` is deprecated (with backward-compatibility), and all core uses are replaced by `StateBase.src`. + +Conversion of source string characters to an integer representing the Unicode character is prevalent in the upstream JavaScript implementation, to improve performance. +However, it is unnecessary in Python and leads to harder to read code and performance deprecations (during the conversion in the `StateBase` initialisation). + +See [#270](https://github.com/executablebooks/markdown-it-py/pull/270), thanks to [@hukkinj1](https://github.com/hukkinj1). + +### ♻️ Centralise indented code block tests + +For CommonMark, the presence of indented code blocks prevent any other block element from having an indent of greater than 4 spaces. +Certain Markdown flavors and derivatives, such as mdx and djot, disable these code blocks though, since it is more common to use code fences and/or arbitrary indenting is desirable. +Previously, disabling code blocks did not remove the indent limitation, since most block elements had the 3 space limitation hard-coded. +This change centralised the logic of applying this limitation (in `StateBlock.is_code_block`), and only applies it when indented code blocks are enabled. + +This allows for e.g. + +```md +
    +
    + + I can indent as much as I want here. + +
    +
    +``` + +See [#260](https://github.com/executablebooks/markdown-it-py/pull/260) + +### 🔧 Maintenance changes + +Strict type annotation checking has been applied to the whole code base, +[ruff](https://github.com/charliermarsh/ruff) is now used for linting, +and fuzzing tests have been added to the CI, to integrate with Google [OSS-Fuzz](https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py) testing, thanks to [@DavidKorczynski](https://github.com/DavidKorczynski). + +* 🔧 MAINTAIN: Make type checking strict [#](https://github.com/executablebooks/markdown-it-py/pull/267) +* 🔧 Add typing of rule functions [#283](https://github.com/executablebooks/markdown-it-py/pull/283) +* 🔧 Move linting from flake8 to ruff [#268](https://github.com/executablebooks/markdown-it-py/pull/268) +* 🧪 CI: Add fuzzing workflow for PRs [#262](https://github.com/executablebooks/markdown-it-py/pull/262) +* 🔧 Add tox env for fuzz testcase run [#263](https://github.com/executablebooks/markdown-it-py/pull/263) +* 🧪 Add OSS-Fuzz set up by @DavidKorczynski in [#255](https://github.com/executablebooks/markdown-it-py/pull/255) +* 🧪 Fix fuzzing test failures [#254](https://github.com/executablebooks/markdown-it-py/pull/254) + ## 2.2.0 - 2023-02-22 ### What's Changed diff --git a/README.md b/README.md index 789588fe..2ff747ef 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ - Pluggable: Adds syntax extensions to extend the parser (see the [plugin list][md-plugins]). - High speed (see our [benchmarking tests][md-performance]) - [Safe by default][md-security] +- Member of [Google's Assured Open Source Software](https://cloud.google.com/assured-open-source-software/docs/supported-packages) This is a Python port of [markdown-it], and some of its associated plugins. For more details see: . diff --git a/docs/index.md b/docs/index.md index 64fd344d..bff3ac31 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,6 +7,7 @@ - {fa}`check,text-success mr-1` Pluggable: Adds syntax extensions to extend the parser (see the [plugin list](md/plugins)) - {fa}`check,text-success mr-1` High speed (see our [benchmarking tests](md/performance)) - {fa}`check,text-success mr-1` [Safe by default](md/security) +- {fa}`check,text-success mr-1` Member of [Google's Assured Open Source Software](https://cloud.google.com/assured-open-source-software/docs/supported-packages) For a good introduction to [markdown-it] see the __[Live demo](https://markdown-it.github.io)__. This is a Python port of the well used [markdown-it], and some of its associated plugins. diff --git a/markdown_it/__init__.py b/markdown_it/__init__.py index 882a0c3e..6606868a 100644 --- a/markdown_it/__init__.py +++ b/markdown_it/__init__.py @@ -1,5 +1,5 @@ """A Python port of Markdown-It""" __all__ = ("MarkdownIt",) -__version__ = "2.2.0" +__version__ = "3.0.0" from .main import MarkdownIt