From c68d342d60bb15a11e5c44d615e4f253dbbbb25c Mon Sep 17 00:00:00 2001 From: Michael <67898553+mib112@users.noreply.github.com> Date: Tue, 12 Apr 2022 22:39:18 +0200 Subject: [PATCH 01/97] =?UTF-8?q?=F0=9F=A7=AA=20TEST:=20Space=20in=20link?= =?UTF-8?q?=20destination=20does=20not=20raise=20`IndexError`=20(#206)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_port/fixtures/commonmark_extras.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md index ca14db31..c2df5084 100644 --- a/tests/test_port/fixtures/commonmark_extras.md +++ b/tests/test_port/fixtures/commonmark_extras.md @@ -663,3 +663,13 @@ Issue #772. Header rule should not interfere with html tags. == . + +Issue #205. Space in link destination generates IndexError +. +[Contact](http:// mail.com) + +[Contact](mailto: mail@mail.com) +. +

[Contact](http:// mail.com)

+

[Contact](mailto: mail@mail.com)

+. From 7748e1308c4d8b5a3ad8c147e424faa10eed6320 Mon Sep 17 00:00:00 2001 From: Kian-Meng Ang Date: Wed, 13 Apr 2022 04:41:46 +0800 Subject: [PATCH 02/97] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20Fix=20typos=20(#20?= =?UTF-8?q?3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/contributing.md | 2 +- markdown_it/common/html_blocks.py | 2 +- markdown_it/port.yaml | 2 +- markdown_it/presets/default.py | 2 +- markdown_it/presets/zero.py | 2 +- markdown_it/ruler.py | 2 +- markdown_it/rules_inline/strikethrough.py | 2 +- markdown_it/rules_inline/text.py | 2 +- tests/test_cmark_spec/spec.md | 2 +- tests/test_cmark_spec/test_spec/test_file.html | 2 +- tests/test_port/fixtures/fatal.md | 6 +++--- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index 9469d0ee..0495ad1e 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -29,7 +29,7 @@ It can be setup by: Editors like VS Code also have automatic code reformat utilities, which can adhere to this standard. All functions and class methods should be annotated with types and include a docstring. -The prefered docstring format is outlined in `markdown-it-py/docstring.fmt.mustache` and can be used automatically with the [autodocstring](https://marketplace.visualstudio.com/items?itemName=njpwerner.autodocstring) VS Code extension. +The preferred docstring format is outlined in `markdown-it-py/docstring.fmt.mustache` and can be used automatically with the [autodocstring](https://marketplace.visualstudio.com/items?itemName=njpwerner.autodocstring) VS Code extension. ## Testing diff --git a/markdown_it/common/html_blocks.py b/markdown_it/common/html_blocks.py index 4246f788..8b199af3 100644 --- a/markdown_it/common/html_blocks.py +++ b/markdown_it/common/html_blocks.py @@ -1,4 +1,4 @@ -"""List of valid html blocks names, accorting to commonmark spec +"""List of valid html blocks names, according to commonmark spec http://jgm.github.io/CommonMark/spec.html#html-blocks """ diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 6c3b06c1..a6718fda 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -45,5 +45,5 @@ `MarkdownIt.add_render_rule(name, function, fmt="html")`, rather than `MarkdownIt.renderer.rules[name] = function` and renderers should declare a class property `__output__ = "html"`. - This allows for extensability to more than just HTML renderers + This allows for extensibility to more than just HTML renderers - inline tokens in tables are assigned a map (this is helpful for propagation to children) diff --git a/markdown_it/presets/default.py b/markdown_it/presets/default.py index 3f347913..59f4855e 100644 --- a/markdown_it/presets/default.py +++ b/markdown_it/presets/default.py @@ -23,7 +23,7 @@ def make(): "breaks": False, # Convert '\n' in paragraphs into
"langPrefix": "language-", # CSS language prefix for fenced blocks # Highlighter function. Should return escaped HTML, - # or '' if the source string is not changed and should be escaped externaly. + # or '' if the source string is not changed and should be escaped externally. # If result starts with "langPrefix": "language-", # CSS language prefix for fenced blocks # Highlighter function. Should return escaped HTML, - # or '' if the source string is not changed and should be escaped externaly. + # or '' if the source string is not changed and should be escaped externally. # If result starts with `~` + `~~` + `~~`, leaving one marker at the # start of the sequence. # diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index f36f069a..0773a9d4 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -5,7 +5,7 @@ # Rule to skip pure text -# '{}$%@~+=:' reserved for extentions +# '{}$%@~+=:' reserved for extensions # !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~ diff --git a/tests/test_cmark_spec/spec.md b/tests/test_cmark_spec/spec.md index e6f31375..2d79f7b7 100644 --- a/tests/test_cmark_spec/spec.md +++ b/tests/test_cmark_spec/spec.md @@ -4533,7 +4533,7 @@ inside the code block: Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a -characer other than a space or tab, and (b) cases in which +character other than a space or tab, and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with three spaces of indentation, the rules do not allow us to form a list item by diff --git a/tests/test_cmark_spec/test_spec/test_file.html b/tests/test_cmark_spec/test_spec/test_file.html index 9f73ce6c..1c2dc3cb 100644 --- a/tests/test_cmark_spec/test_spec/test_file.html +++ b/tests/test_cmark_spec/test_spec/test_file.html @@ -3418,7 +3418,7 @@

List items

Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a -characer other than a space or tab, and (b) cases in which +character other than a space or tab, and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with three spaces of indentation, the rules do not allow us to form a list item by diff --git a/tests/test_port/fixtures/fatal.md b/tests/test_port/fixtures/fatal.md index dfeeb2e7..7b2afcfc 100644 --- a/tests/test_port/fixtures/fatal.md +++ b/tests/test_port/fixtures/fatal.md @@ -1,4 +1,4 @@ -Should not throw exception on invalid chars in URL (`*` not allowed in path) [mailformed URI] +Should not throw exception on invalid chars in URL (`*` not allowed in path) [malformed URI] . [foo](<%test>) . @@ -6,7 +6,7 @@ Should not throw exception on invalid chars in URL (`*` not allowed in path) [ma . -Should not throw exception on broken utf-8 sequence in URL [mailformed URI] +Should not throw exception on broken utf-8 sequence in URL [malformed URI] . [foo](%C3) . @@ -14,7 +14,7 @@ Should not throw exception on broken utf-8 sequence in URL [mailformed URI] . -Should not throw exception on broken utf-16 surrogates sequence in URL [mailformed URI] +Should not throw exception on broken utf-16 surrogates sequence in URL [malformed URI] . [foo](�) . From 4e5be6639c0fd2a3fa8f4e2619e350a88ffb1e2f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 14 Apr 2022 23:31:04 +0200 Subject: [PATCH 03/97] =?UTF-8?q?=F0=9F=94=A7=20MAINTAIN:=20Move=20from=20?= =?UTF-8?q?setuptools=20to=20flit=20(#208)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/tests.yml | 22 ++++----- .pre-commit-config.yaml | 22 ++------- MANIFEST.in | 24 --------- docs/contributing.md | 1 - docstring.fmt.mustache | 20 -------- markdown_it/token.py | 2 +- markdown_it/tree.py | 2 +- pyproject.toml | 98 ++++++++++++++++++++++++++++++++++++- setup.cfg | 93 ----------------------------------- setup.py | 6 --- tox.ini | 4 ++ 11 files changed, 117 insertions(+), 177 deletions(-) delete mode 100644 MANIFEST.in delete mode 100644 docstring.fmt.mustache delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e592a769..e46c0847 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,7 +7,7 @@ on: push: branches: [master] tags: - - 'v*' + - "v[0-9]+.[0-9]+.[0-9]+*" pull_request: schedule: - cron: '0 0 * * 0' # every week @@ -113,19 +113,19 @@ jobs: steps: - name: Checkout source uses: actions/checkout@v2 - - name: Set up Python 3.7 + - name: Set up Python 3.8 uses: actions/setup-python@v2 with: - python-version: '3.7' - - name: Build package + python-version: '3.8' + - name: install flit run: | - pip install build - python -m build - - name: Publish - uses: pypa/gh-action-pypi-publish@v1.1.0 - with: - user: __token__ - password: ${{ secrets.PYPI_KEY }} + pip install flit~=3.4 + - name: Build and publish + run: | + flit publish + env: + FLIT_USERNAME: __token__ + FLIT_PASSWORD: ${{ secrets.PYPI_KEY }} allgood: runs-on: ubuntu-latest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index da6ae0ac..50921671 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,40 +16,26 @@ exclude: > repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.2.0 hooks: - id: check-json - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/mgedmin/check-manifest - rev: "0.46" - hooks: - - id: check-manifest - args: [--no-build-isolation] - additional_dependencies: [setuptools>=46.4.0] - - # this is not used for now, - # since it converts markdown-it-py to markdown_it_py and removes comments - # - repo: https://github.com/asottile/setup-cfg-fmt - # rev: v1.17.0 - # hooks: - # - id: setup-cfg-fmt - - repo: https://github.com/psf/black - rev: 20.8b1 + rev: 22.3.0 hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.1 + rev: 3.9.2 hooks: - id: flake8 additional_dependencies: [flake8-bugbear==21.3.1] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.812 + rev: v0.942 hooks: - id: mypy additional_dependencies: [attrs] diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 1302ef01..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,24 +0,0 @@ -exclude docs -recursive-exclude docs * -exclude tests -recursive-exclude tests * -exclude benchmarking -recursive-exclude benchmarking * - -exclude .pre-commit-config.yaml -exclude .readthedocs.yml -exclude tox.ini -exclude docstring.fmt.mustache -exclude .flake8 -exclude .circleci -exclude .circleci/config.yml -exclude codecov.yml -exclude .mypy.ini -exclude profiler.py - -include LICENSE -include LICENSE.markdown-it -include CHANGELOG.md - -include markdown_it/py.typed -include markdown_it/port.yaml diff --git a/docs/contributing.md b/docs/contributing.md index 0495ad1e..6c43e0e0 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -29,7 +29,6 @@ It can be setup by: Editors like VS Code also have automatic code reformat utilities, which can adhere to this standard. All functions and class methods should be annotated with types and include a docstring. -The preferred docstring format is outlined in `markdown-it-py/docstring.fmt.mustache` and can be used automatically with the [autodocstring](https://marketplace.visualstudio.com/items?itemName=njpwerner.autodocstring) VS Code extension. ## Testing diff --git a/docstring.fmt.mustache b/docstring.fmt.mustache deleted file mode 100644 index 717a4572..00000000 --- a/docstring.fmt.mustache +++ /dev/null @@ -1,20 +0,0 @@ -{{! Sphinx Docstring Template }} -{{summaryPlaceholder}} - -{{extendedSummaryPlaceholder}} - -{{#args}} -:param {{var}}: {{descriptionPlaceholder}} -{{/args}} -{{#kwargs}} -:param {{var}}: {{descriptionPlaceholder}} -{{/kwargs}} -{{#exceptions}} -:raises {{type}}: {{descriptionPlaceholder}} -{{/exceptions}} -{{#returns}} -:return: {{descriptionPlaceholder}} -{{/returns}} -{{#yields}} -:yield: {{descriptionPlaceholder}} -{{/yields}} diff --git a/markdown_it/token.py b/markdown_it/token.py index 8abf72c3..83cfb57b 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -173,7 +173,7 @@ def __getattr__(self, name): return getattr(self.opening, name) def attrGet(self, name: str) -> None | str | int | float: - """ Get the value of attribute `name`, or null if it does not exist.""" + """Get the value of attribute `name`, or null if it does not exist.""" return self.opening.attrGet(name) diff --git a/markdown_it/tree.py b/markdown_it/tree.py index a3c75487..b17103e5 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -276,7 +276,7 @@ def _attribute_token(self) -> Token: @property def tag(self) -> str: - """html tag name, e.g. \"p\"""" + """html tag name, e.g. \"p\" """ return self._attribute_token().tag @property diff --git a/pyproject.toml b/pyproject.toml index f7e22d4a..74de047b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,100 @@ [build-system] -requires = ["setuptools>=46.4.0", "wheel"] -build-backend = "setuptools.build_meta" +requires = ["flit_core >=3.4,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "markdown-it-py" +dynamic = ["version"] +description = "Python port of markdown-it. Markdown parsing, done right!" +readme = "README.md" +authors = [{name = "Chris Sewell", email = "chrisj_sewell@hotmail.com"}] +license = {file = "LICENSE"} +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Text Processing :: Markup", +] +keywords = ["markdown", "lexer", "parser", "commonmark", "markdown-it"] +requires-python = ">=3.7" +dependencies = [ + "attrs>=19,<22", + "mdurl~=0.1", + "typing_extensions>=3.7.4;python_version<'3.8'", +] + +[project.urls] +Homepage = "https://github.com/executablebooks/markdown-it-py" +Documentation = "https://markdown-it-py.readthedocs.io/" + +[project.optional-dependencies] +code_style = ["pre-commit==2.6"] +compare = [ + "commonmark~=0.9.1", + "markdown~=3.3.6", + "mistletoe~=0.8.1", + "mistune~=2.0.2", + "panflute~=2.1.3", +] +linkify = ["linkify-it-py~=1.0"] +plugins = ["mdit-py-plugins"] +rtd = [ + "myst-nb==0.13.0a1", + "pyyaml", + "sphinx>=2,<4", + "sphinx-copybutton", + "sphinx-panels~=0.4.0", + "sphinx_book_theme", +] +testing = [ + "coverage", + "pytest", + "pytest-cov", + "pytest-regressions", +] +benchmarking = [ + "psutil", + "pytest", + "pytest-benchmark~=3.2", +] +profiling = ["gprof2dot"] + +[project.scripts] +markdown-it = "markdown_it.cli.parse:main" + +[tool.flit.module] +name = "markdown_it" + +[tool.flit.sdist] +exclude = [ + "docs/", + "tests/", + "benchmarking/" +] + +[tool.mypy] +show_error_codes = true +warn_unused_ignores = true +warn_redundant_casts = true +no_implicit_optional = true +strict_equality = true +implicit_reexport = false + +[[tool.mypy.overrides]] +module = ["tests.test_plugins.*", "markdown.*"] +ignore_errors = true + +[[tool.mypy.overrides]] +module = ["markdown.*"] +ignore_missing_imports = true [tool.pytest.ini_options] xfail_strict = true diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 652b36a0..00000000 --- a/setup.cfg +++ /dev/null @@ -1,93 +0,0 @@ -[metadata] -name = markdown-it-py -version = attr: markdown_it.__version__ -description = Python port of markdown-it. Markdown parsing, done right! -long_description = file: README.md -long_description_content_type = text/markdown -url = https://github.com/executablebooks/markdown-it-py -author = Chris Sewell -author_email = chrisj_sewell@hotmail.com -license = MIT -license_file = LICENSE -classifiers = - Development Status :: 5 - Production/Stable - Intended Audience :: Developers - License :: OSI Approved :: MIT License - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - Topic :: Software Development :: Libraries :: Python Modules - Topic :: Text Processing :: Markup -keywords = markdown lexer parser development -project_urls = - Documentation=https://markdown-it-py.readthedocs.io - -[options] -packages = find: -install_requires = - attrs>=19,<22 - mdurl~=0.1 - typing_extensions>=3.7.4;python_version<'3.8' -python_requires = >=3.7 -include_package_data = True -zip_safe = False - -[options.entry_points] -console_scripts = - markdown-it = markdown_it.cli.parse:main - -[options.extras_require] -code_style = - pre-commit==2.6 -compare = - commonmark~=0.9.1 - markdown~=3.3.6 - mistletoe~=0.8.1 - mistune~=2.0.2 - panflute~=2.1.3 -linkify = - linkify-it-py~=1.0 -plugins = - mdit-py-plugins -rtd = - myst-nb==0.13.0a1 - pyyaml - sphinx>=2,<4 - sphinx-copybutton - sphinx-panels~=0.4.0 - sphinx_book_theme -testing = - coverage - pytest - pytest-cov - pytest-regressions -benchmarking = - psutil - pytest - pytest-benchmark~=3.2 -profiling = - gprof2dot - -[options.packages.find] -exclude = - test* - benchmarking - -[mypy] -show_error_codes = True -warn_unused_ignores = True -warn_redundant_casts = True -no_implicit_optional = True -strict_equality = True -implicit_reexport = False - -[mypy-tests.test_plugins.*] -ignore_errors = True - -[flake8] -max-line-length = 100 -extend-ignore = E203 diff --git a/setup.py b/setup.py deleted file mode 100644 index 36141267..00000000 --- a/setup.py +++ /dev/null @@ -1,6 +0,0 @@ -# This file is needed for editable installs (`pip install -e .`). -# Can be removed once the following is resolved -# https://github.com/pypa/packaging-problems/issues/256 -from setuptools import setup - -setup() diff --git a/tox.ini b/tox.ini index e771e3e9..44860ef6 100644 --- a/tox.ini +++ b/tox.ini @@ -56,3 +56,7 @@ commands = gprof2dot -f pstats -o "{toxworkdir}/prof/output.dot" "{toxworkdir}/prof/output.pstats" dot -Tsvg -o "{toxworkdir}/prof/output.svg" "{toxworkdir}/prof/output.dot" python -c 'import pathlib; print("profiler svg output under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "prof" / "output.svg"))' + +[flake8] +max-line-length = 100 +extend-ignore = E203 From cb2eee1338e2da0eca394b323b7811c2147619e5 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 15 Apr 2022 00:35:45 +0200 Subject: [PATCH 04/97] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20Fix=20build=20(#20?= =?UTF-8?q?9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By temporarily removing myst-nb --- docs/_static/custom.css | 5 +++++ docs/architecture.md | 4 ++-- docs/conf.py | 30 +++++++++++++++++++++++------- pyproject.toml | 6 +++--- tox.ini | 5 ++++- 5 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 docs/_static/custom.css diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 00000000..9a16010b --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,5 @@ +.code-cell > .highlight > pre { + border-left-color: green; + border-left-width: medium; + border-left-style: solid; +} diff --git a/docs/architecture.md b/docs/architecture.md index bfc49c27..bebcf9dc 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -2,6 +2,7 @@ # markdown-it design principles +(md/data-flow)= ## Data flow Input data is parsed via nested chains of rules. There are 3 nested chains - @@ -157,10 +158,9 @@ renderer override, but can be more simple. You also can write your own renderer to generate other formats than HTML, such as JSON/XML... You can even use it to generate AST. - ## Summary -This was mentioned in [Data flow](#data-flow), but let's repeat sequence again: +This was mentioned in [Data flow](md/data-flow), but let's repeat sequence again: 1. Blocks are parsed, and top level of token stream filled with block tokens. 2. Content on inline containers is parsed, filling `.children` properties. diff --git a/docs/conf.py b/docs/conf.py index 1040838e..56ded533 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,25 +33,25 @@ "sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", - "myst_nb", + "myst_parser", "sphinx_copybutton", - "sphinx_panels", + "sphinx_design", ] -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +nitpicky = True nitpick_ignore = [ ("py:class", "Match"), + ("py:class", "Path"), ("py:class", "x in the interval [0, 1)."), ("py:class", "markdown_it.helpers.parse_link_destination._Result"), ("py:class", "markdown_it.helpers.parse_link_title._Result"), ("py:class", "MarkdownIt"), + ("py:class", "RuleFunc"), ("py:class", "_NodeType"), ("py:class", "typing_extensions.Protocol"), ] @@ -70,7 +70,8 @@ "repository_branch": "master", "path_to_docs": "docs", } -panels_add_boostrap_css = False +html_static_path = ["_static"] +html_css_files = ["custom.css"] # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -100,7 +101,7 @@ def run_apidoc(app): this_folder = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) api_folder = os.path.join(this_folder, "api") module_path = os.path.normpath(os.path.join(this_folder, "../")) - ignore_paths = ["../setup.py", "../conftest.py", "../tests", "../benchmarking"] + ignore_paths = ["../profiler.py", "../conftest.py", "../tests", "../benchmarking"] ignore_paths = [ os.path.normpath(os.path.join(this_folder, p)) for p in ignore_paths ] @@ -120,6 +121,7 @@ def run_apidoc(app): argv = ["-M", "--separate", "-o", api_folder, module_path] + ignore_paths + apidoc.OPTIONS.append("ignore-module-all") apidoc.main(argv) # we don't use this @@ -131,3 +133,17 @@ def setup(app): """Add functions to the Sphinx setup.""" if os.environ.get("SKIP_APIDOC", None) is None: app.connect("builder-inited", run_apidoc) + + from sphinx.directives.code import CodeBlock + + class CodeCell(CodeBlock): + """Custom code block directive.""" + + def run(self): + """Run the directive.""" + self.options["class"] = ["code-cell"] + return super().run() + + # note, these could be run by myst-nb, + # but currently this causes a circular dependency issue + app.add_directive("code-cell", CodeCell) diff --git a/pyproject.toml b/pyproject.toml index 74de047b..55106e49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,11 +47,11 @@ compare = [ linkify = ["linkify-it-py~=1.0"] plugins = ["mdit-py-plugins"] rtd = [ - "myst-nb==0.13.0a1", + "myst-parser", "pyyaml", - "sphinx>=2,<4", + "sphinx", "sphinx-copybutton", - "sphinx-panels~=0.4.0", + "sphinx-design", "sphinx_book_theme", ] testing = [ diff --git a/tox.ini b/tox.ini index 44860ef6..f4e117e2 100644 --- a/tox.ini +++ b/tox.ini @@ -37,12 +37,15 @@ commands = pytest benchmarking/bench_packages.py {posargs} [testenv:docs-{update,clean}] extras = linkify,plugins,rtd -whitelist_externals = rm +whitelist_externals = + echo + rm setenv = update: SKIP_APIDOC = true commands = clean: rm -rf docs/_build sphinx-build -nW --keep-going -b {posargs:html} docs/ docs/_build/{posargs:html} +commands_post = echo "open file://{toxinidir}/docs/_build/{posargs:html}/index.html" [testenv:profile] description = run profiler (use e.g. `firefox .tox/prof/output.svg` to open) From 063268b3fad808e63787cedf1917214405dede31 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 15 Apr 2022 00:46:30 +0200 Subject: [PATCH 05/97] =?UTF-8?q?=F0=9F=94=A7=20MAINTAIN:=20Add=20isort=20?= =?UTF-8?q?hook=20(#210)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 5 +++++ benchmarking/bench_core.py | 1 + benchmarking/bench_packages.py | 3 ++- docs/conf.py | 1 + markdown_it/cli/parse.py | 1 - markdown_it/common/normalize_url.py | 3 +-- markdown_it/helpers/__init__.py | 2 +- markdown_it/helpers/parse_link_destination.py | 2 +- markdown_it/helpers/parse_link_title.py | 2 +- markdown_it/main.py | 8 ++++---- markdown_it/parser_block.py | 4 ++-- markdown_it/parser_core.py | 5 ++--- markdown_it/parser_inline.py | 6 +++--- markdown_it/renderer.py | 2 +- markdown_it/ruler.py | 1 + markdown_it/rules_block/__init__.py | 12 ++++++------ markdown_it/rules_block/blockquote.py | 2 +- markdown_it/rules_block/code.py | 1 + markdown_it/rules_block/heading.py | 2 +- markdown_it/rules_block/hr.py | 1 - markdown_it/rules_block/html_block.py | 2 +- markdown_it/rules_block/list.py | 2 +- markdown_it/rules_block/reference.py | 3 +-- markdown_it/rules_block/state_block.py | 4 ++-- markdown_it/rules_block/table.py | 3 +-- markdown_it/rules_core/__init__.py | 6 +++--- markdown_it/rules_core/linkify.py | 3 +-- markdown_it/rules_core/normalize.py | 1 - markdown_it/rules_core/replacements.py | 2 +- markdown_it/rules_core/smartquotes.py | 6 ++---- markdown_it/rules_core/state_core.py | 2 +- markdown_it/rules_inline/__init__.py | 19 +++++++++---------- markdown_it/rules_inline/autolink.py | 1 + markdown_it/rules_inline/emphasis.py | 2 +- markdown_it/rules_inline/entity.py | 2 +- markdown_it/rules_inline/escape.py | 3 +-- markdown_it/rules_inline/html_inline.py | 2 +- markdown_it/rules_inline/image.py | 4 ++-- markdown_it/rules_inline/link.py | 2 +- markdown_it/rules_inline/newline.py | 2 +- markdown_it/rules_inline/state_inline.py | 4 ++-- markdown_it/rules_inline/strikethrough.py | 2 +- markdown_it/rules_inline/text.py | 3 ++- markdown_it/tree.py | 7 +------ pyproject.toml | 4 ++++ tests/test_api/test_token.py | 2 +- tests/test_port/test_misc.py | 3 +-- 47 files changed, 79 insertions(+), 81 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 50921671..b9b3f0c0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,6 +23,11 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace + - repo: https://github.com/pycqa/isort + rev: 5.10.1 + hooks: + - id: isort + - repo: https://github.com/psf/black rev: 22.3.0 hooks: diff --git a/benchmarking/bench_core.py b/benchmarking/bench_core.py index d8808d59..6834989f 100644 --- a/benchmarking/bench_core.py +++ b/benchmarking/bench_core.py @@ -1,4 +1,5 @@ from pathlib import Path + import pytest import markdown_it diff --git a/benchmarking/bench_packages.py b/benchmarking/bench_packages.py index afa39ead..1158750e 100644 --- a/benchmarking/bench_packages.py +++ b/benchmarking/bench_packages.py @@ -1,7 +1,8 @@ from pathlib import Path -import pytest from shutil import which +import pytest + @pytest.fixture def spec_text(): diff --git a/docs/conf.py b/docs/conf.py index 56ded533..786eff04 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -92,6 +92,7 @@ def run_apidoc(app): """ import os import shutil + import sphinx from sphinx.ext import apidoc diff --git a/markdown_it/cli/parse.py b/markdown_it/cli/parse.py index e159aad2..2d74f55a 100644 --- a/markdown_it/cli/parse.py +++ b/markdown_it/cli/parse.py @@ -13,7 +13,6 @@ from markdown_it import __version__ from markdown_it.main import MarkdownIt - version_str = "markdown-it-py [version {}]".format(__version__) diff --git a/markdown_it/common/normalize_url.py b/markdown_it/common/normalize_url.py index 4ecf2ef4..afec9284 100644 --- a/markdown_it/common/normalize_url.py +++ b/markdown_it/common/normalize_url.py @@ -2,13 +2,12 @@ from collections.abc import Callable import re -from urllib.parse import urlparse, urlunparse, quote, unquote # noqa: F401 +from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401 import mdurl from .. import _punycode - RECODE_HOSTNAME_FOR = ("http:", "https:", "mailto:") diff --git a/markdown_it/helpers/__init__.py b/markdown_it/helpers/__init__.py index f76bd444..3dbbdd1d 100644 --- a/markdown_it/helpers/__init__.py +++ b/markdown_it/helpers/__init__.py @@ -1,6 +1,6 @@ """Functions for parsing Links """ __all__ = ("parseLinkLabel", "parseLinkDestination", "parseLinkTitle") -from .parse_link_label import parseLinkLabel from .parse_link_destination import parseLinkDestination +from .parse_link_label import parseLinkLabel from .parse_link_title import parseLinkTitle diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index 74dbec08..58b76f3c 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -2,7 +2,7 @@ Parse link destination """ -from ..common.utils import unescapeAll, charCodeAt +from ..common.utils import charCodeAt, unescapeAll class _Result: diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py index 0cb1365b..842c83bc 100644 --- a/markdown_it/helpers/parse_link_title.py +++ b/markdown_it/helpers/parse_link_title.py @@ -1,6 +1,6 @@ """Parse link title """ -from ..common.utils import unescapeAll, charCodeAt +from ..common.utils import charCodeAt, unescapeAll class _Result: diff --git a/markdown_it/main.py b/markdown_it/main.py index 508b5ce4..7faac5ad 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -1,17 +1,17 @@ from __future__ import annotations -from contextlib import contextmanager from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping +from contextlib import contextmanager from typing import Any from . import helpers, presets # noqa F401 from .common import normalize_url, utils # noqa F401 -from .token import Token -from .parser_core import ParserCore # noqa F401 from .parser_block import ParserBlock # noqa F401 +from .parser_core import ParserCore # noqa F401 from .parser_inline import ParserInline # noqa F401 -from .rules_core.state_core import StateCore from .renderer import RendererHTML, RendererProtocol +from .rules_core.state_core import StateCore +from .token import Token from .utils import OptionsDict try: diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index f5768058..f331ec54 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -3,10 +3,10 @@ import logging +from . import rules_block from .ruler import Ruler -from .token import Token from .rules_block.state_block import StateBlock -from . import rules_block +from .token import Token LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index f0c3ad22..32209b32 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -6,10 +6,9 @@ """ from __future__ import annotations -from .ruler import Ruler, RuleFunc +from .ruler import RuleFunc, Ruler +from .rules_core import block, inline, linkify, normalize, replace, smartquotes from .rules_core.state_core import StateCore -from .rules_core import normalize, block, inline, replace, smartquotes, linkify - _rules: list[tuple[str, RuleFunc]] = [ ("normalize", normalize), diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index 826665db..b61c990b 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -2,10 +2,10 @@ """ from __future__ import annotations -from .ruler import Ruler, RuleFunc -from .token import Token -from .rules_inline.state_inline import StateInline from . import rules_inline +from .ruler import RuleFunc, Ruler +from .rules_inline.state_inline import StateInline +from .token import Token # Parser rules _rules: list[tuple[str, RuleFunc]] = [ diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 88ee36fe..b8bfe4da 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -11,7 +11,7 @@ class Renderer import inspect from typing import Any, ClassVar -from .common.utils import unescapeAll, escapeHtml +from .common.utils import escapeHtml, unescapeAll from .token import Token from .utils import OptionsDict diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 2217bcd6..06576520 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -19,6 +19,7 @@ class Ruler from collections.abc import Callable, Iterable, MutableMapping from typing import TYPE_CHECKING + import attr if TYPE_CHECKING: diff --git a/markdown_it/rules_block/__init__.py b/markdown_it/rules_block/__init__.py index c1660b89..bcf138df 100644 --- a/markdown_it/rules_block/__init__.py +++ b/markdown_it/rules_block/__init__.py @@ -13,15 +13,15 @@ "table", ) -from .state_block import StateBlock -from .paragraph import paragraph -from .heading import heading -from .lheading import lheading +from .blockquote import blockquote from .code import code from .fence import fence +from .heading import heading from .hr import hr +from .html_block import html_block +from .lheading import lheading from .list import list_block +from .paragraph import paragraph from .reference import reference -from .blockquote import blockquote -from .html_block import html_block +from .state_block import StateBlock from .table import table diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 52616167..7f72b896 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -3,8 +3,8 @@ import logging -from .state_block import StateBlock from ..common.utils import isSpace +from .state_block import StateBlock LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py index 6d9c87a3..c4fdba33 100644 --- a/markdown_it/rules_block/code.py +++ b/markdown_it/rules_block/code.py @@ -1,5 +1,6 @@ """Code block (4 spaces padded).""" import logging + from .state_block import StateBlock LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 3ccc3059..8d4ef3e2 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -3,8 +3,8 @@ import logging -from .state_block import StateBlock from ..common.utils import isSpace +from .state_block import StateBlock LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 01c68552..804cd9db 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -7,7 +7,6 @@ from ..common.utils import isSpace from .state_block import StateBlock - LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index 335d7cd0..31afab76 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -4,9 +4,9 @@ import logging import re -from .state_block import StateBlock from ..common.html_blocks import block_names from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR +from .state_block import StateBlock LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index 59789350..a7617ad2 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -1,8 +1,8 @@ # Lists import logging -from .state_block import StateBlock from ..common.utils import isSpace +from .state_block import StateBlock LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 1704d806..35adde2a 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -1,9 +1,8 @@ import logging -from ..common.utils import isSpace, normalizeReference, charCodeAt +from ..common.utils import charCodeAt, isSpace, normalizeReference from .state_block import StateBlock - LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index b2c71892..4b32219f 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -2,9 +2,9 @@ from typing import TYPE_CHECKING -from ..token import Token -from ..ruler import StateBase from ..common.utils import isSpace +from ..ruler import StateBase +from ..token import Token if TYPE_CHECKING: from markdown_it.main import MarkdownIt diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index 8c2c5927..e3db8584 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -1,9 +1,8 @@ # GFM table, https://github.github.com/gfm/#tables-extension- import re +from ..common.utils import charCodeAt, isSpace from .state_block import StateBlock -from ..common.utils import isSpace, charCodeAt - headerLineRe = re.compile(r"^:?-+:?$") enclosingPipesRe = re.compile(r"^\||\|$") diff --git a/markdown_it/rules_core/__init__.py b/markdown_it/rules_core/__init__.py index 7f5de3e4..f80034c5 100644 --- a/markdown_it/rules_core/__init__.py +++ b/markdown_it/rules_core/__init__.py @@ -8,10 +8,10 @@ "linkify", ) -from .state_core import StateCore -from .normalize import normalize from .block import block from .inline import inline +from .linkify import linkify +from .normalize import normalize from .replacements import replace from .smartquotes import smartquotes -from .linkify import linkify +from .state_core import StateCore diff --git a/markdown_it/rules_core/linkify.py b/markdown_it/rules_core/linkify.py index 0acc6f11..49bb4ef3 100644 --- a/markdown_it/rules_core/linkify.py +++ b/markdown_it/rules_core/linkify.py @@ -1,9 +1,8 @@ import re from ..common.utils import arrayReplaceAt -from .state_core import StateCore from ..token import Token - +from .state_core import StateCore LINK_OPEN_RE = re.compile(r"^\s]", flags=re.IGNORECASE) LINK_CLOSE_RE = re.compile(r"^", flags=re.IGNORECASE) diff --git a/markdown_it/rules_core/normalize.py b/markdown_it/rules_core/normalize.py index 14b2f679..bf16fd7a 100644 --- a/markdown_it/rules_core/normalize.py +++ b/markdown_it/rules_core/normalize.py @@ -3,7 +3,6 @@ from .state_core import StateCore - # https://spec.commonmark.org/0.29/#line-ending NEWLINES_RE = re.compile(r"\r\n?|\n") NULL_RE = re.compile(r"\0") diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index bced7026..45377d3e 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -19,8 +19,8 @@ import logging import re -from .state_core import StateCore from ..token import Token +from .state_core import StateCore LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index 7c297269..93f8be28 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -5,11 +5,9 @@ import re from typing import Any -from .state_core import StateCore -from ..common.utils import charCodeAt -from ..common.utils import isWhiteSpace, isPunctChar, isMdAsciiPunct +from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace from ..token import Token - +from .state_core import StateCore QUOTE_TEST_RE = re.compile(r"['\"]") QUOTE_RE = re.compile(r"['\"]") diff --git a/markdown_it/rules_core/state_core.py b/markdown_it/rules_core/state_core.py index 3521df2f..15b7c605 100644 --- a/markdown_it/rules_core/state_core.py +++ b/markdown_it/rules_core/state_core.py @@ -3,8 +3,8 @@ from collections.abc import MutableMapping from typing import TYPE_CHECKING -from ..token import Token from ..ruler import StateBase +from ..token import Token if TYPE_CHECKING: from markdown_it import MarkdownIt diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py index 0cce406b..f27907ce 100644 --- a/markdown_it/rules_inline/__init__.py +++ b/markdown_it/rules_inline/__init__.py @@ -14,17 +14,16 @@ "html_inline", "strikethrough", ) -from .state_inline import StateInline -from .text import text -from .text_collapse import text_collapse +from . import emphasis, strikethrough +from .autolink import autolink +from .backticks import backtick from .balance_pairs import link_pairs +from .entity import entity from .escape import escape -from .newline import newline -from .backticks import backtick -from . import emphasis +from .html_inline import html_inline from .image import image from .link import link -from .autolink import autolink -from .entity import entity -from .html_inline import html_inline -from . import strikethrough +from .newline import newline +from .state_inline import StateInline +from .text import text +from .text_collapse import text_collapse diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index 6a55e49a..a4ee61c3 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -1,5 +1,6 @@ # Process autolinks '' import re + from .state_inline import StateInline EMAIL_RE = re.compile( diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index ef32c8d9..9001b09e 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -1,7 +1,7 @@ # Process *this* and _that_ # -from .state_inline import StateInline, Delimiter +from .state_inline import Delimiter, StateInline def tokenize(state: StateInline, silent: bool): diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 8354e6c7..883a9666 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -2,7 +2,7 @@ import re from ..common.entities import entities -from ..common.utils import isValidEntityCode, fromCodePoint +from ..common.utils import fromCodePoint, isValidEntityCode from .state_inline import StateInline DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE) diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 64d9a678..36bd0402 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -1,9 +1,8 @@ """ Process escaped chars and hardbreaks """ -from .state_inline import StateInline from ..common.utils import isSpace - +from .state_inline import StateInline ESCAPED = [0 for _ in range(256)] for ch in "\\!\"#$%&'()*+,./:;<=>?@[]^_`{|}~-": diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 7333e370..295cc5c7 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -1,6 +1,6 @@ # Process html tags -from .state_inline import StateInline from ..common.html_re import HTML_TAG_RE +from .state_inline import StateInline def isLetter(ch: int): diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index eb3824b1..d2a08d47 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -1,9 +1,9 @@ # Process ![image]( "title") from __future__ import annotations -from .state_inline import StateInline -from ..token import Token from ..common.utils import isSpace, normalizeReference +from ..token import Token +from .state_inline import StateInline def image(state: StateInline, silent: bool): diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 919ccf12..2394d6c3 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -1,6 +1,6 @@ # Process [link]( "stuff") -from ..common.utils import normalizeReference, isSpace +from ..common.utils import isSpace, normalizeReference from .state_inline import StateInline diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index b4b8a67f..3034e408 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -1,8 +1,8 @@ # Proceess '\n' import re -from .state_inline import StateInline from ..common.utils import charCodeAt, isSpace +from .state_inline import StateInline endSpace = re.compile(r" +$") diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index a6f72a92..430357d1 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -6,9 +6,9 @@ import attr -from ..token import Token +from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace from ..ruler import StateBase -from ..common.utils import isWhiteSpace, isPunctChar, isMdAsciiPunct +from ..token import Token if TYPE_CHECKING: from markdown_it import MarkdownIt diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index fced7e32..107ea26b 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -1,7 +1,7 @@ # ~~strike through~~ from __future__ import annotations -from .state_inline import StateInline, Delimiter +from .state_inline import Delimiter, StateInline def tokenize(state: StateInline, silent: bool): diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index 0773a9d4..ec6ee0fa 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -3,7 +3,6 @@ from .state_inline import StateInline - # Rule to skip pure text # '{}$%@~+=:' reserved for extensions @@ -11,6 +10,8 @@ # !!!! Don't confuse with "Markdown ASCII Punctuation" chars # http://spec.commonmark.org/0.15/#ascii-punctuation-character + + def isTerminatorChar(ch): return ch in { 0x0A, # /* \n */: diff --git a/markdown_it/tree.py b/markdown_it/tree.py index b17103e5..09476b22 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -6,12 +6,7 @@ from collections.abc import Generator, Sequence import textwrap -from typing import ( - NamedTuple, - Any, - TypeVar, - overload, -) +from typing import Any, NamedTuple, TypeVar, overload from .token import Token from .utils import _removesuffix diff --git a/pyproject.toml b/pyproject.toml index 55106e49..1ef8304c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,10 @@ exclude = [ "benchmarking/" ] +[tool.isort] +profile = "black" +force_sort_within_sections = true + [tool.mypy] show_error_codes = true warn_unused_ignores = true diff --git a/tests/test_api/test_token.py b/tests/test_api/test_token.py index df4a0390..a35616b2 100644 --- a/tests/test_api/test_token.py +++ b/tests/test_api/test_token.py @@ -1,6 +1,6 @@ import warnings -from markdown_it.token import Token, nest_tokens, NestedTokens +from markdown_it.token import NestedTokens, Token, nest_tokens def test_token(): diff --git a/tests/test_port/test_misc.py b/tests/test_port/test_misc.py index 8e7a5239..62b5bf85 100644 --- a/tests/test_port/test_misc.py +++ b/tests/test_port/test_misc.py @@ -1,5 +1,4 @@ -from markdown_it import MarkdownIt -from markdown_it import presets +from markdown_it import MarkdownIt, presets def test_highlight_arguments(): From ead951d952b451895323126cc45a6084d8e7011f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 15 Apr 2022 00:57:56 +0200 Subject: [PATCH 06/97] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20REFACTOR:=20Move=20i?= =?UTF-8?q?nternal=20`Rule`/`Delimiter`=20to=20dataclass=20(#211)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markdown_it/ruler.py | 13 ++++++------- markdown_it/rules_inline/state_inline.py | 21 ++++++++++----------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 06576520..b46bac69 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -18,10 +18,9 @@ class Ruler from __future__ import annotations from collections.abc import Callable, Iterable, MutableMapping +from dataclasses import dataclass, field from typing import TYPE_CHECKING -import attr - if TYPE_CHECKING: from markdown_it import MarkdownIt @@ -51,12 +50,12 @@ def src(self, value: str) -> None: RuleFunc = Callable -@attr.s(slots=True) +@dataclass() class Rule: - name: str = attr.ib() - enabled: bool = attr.ib() - fn: RuleFunc = attr.ib(repr=False) - alt: list[str] = attr.ib() + name: str + enabled: bool + fn: RuleFunc = field(repr=False) + alt: list[str] class Ruler: diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 430357d1..442dac81 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -2,10 +2,9 @@ from collections import namedtuple from collections.abc import MutableMapping +from dataclasses import dataclass from typing import TYPE_CHECKING -import attr - from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace from ..ruler import StateBase from ..token import Token @@ -14,13 +13,13 @@ from markdown_it import MarkdownIt -@attr.s(slots=True) +@dataclass() class Delimiter: # Char code of the starting marker (number). - marker: int = attr.ib() + marker: int # Total length of these series of delimiters. - length: int = attr.ib() + length: int # An amount of characters before this one that's equivalent to # current one. In plain English: if this delimiter does not open @@ -28,21 +27,21 @@ class Delimiter: # # Used to skip sequences like "*****" in one step, for 1st asterisk # value will be 0, for 2nd it's 1 and so on. - jump: int = attr.ib() + jump: int # A position of the token this delimiter corresponds to. - token: int = attr.ib() + token: int # If this delimiter is matched as a valid opener, `end` will be # equal to its position, otherwise it's `-1`. - end: int = attr.ib() + end: int # Boolean flags that determine if this delimiter could open or close # an emphasis. - open: bool = attr.ib() - close: bool = attr.ib() + open: bool + close: bool - level: bool = attr.ib(default=None) + level: bool | None = None Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"]) From cbca541359b26a302c83862b32a027c1651736eb Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 15 Apr 2022 01:01:47 +0200 Subject: [PATCH 07/97] =?UTF-8?q?=E2=80=BC=EF=B8=8F=20Remove=20deorecated?= =?UTF-8?q?=20`NestedTokens`=20and=20`nest=5Ftokens`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Already replaced by `SyntaxTreeNode` --- markdown_it/token.py | 64 ------------------------------------ tests/test_api/test_token.py | 32 +----------------- 2 files changed, 1 insertion(+), 95 deletions(-) diff --git a/markdown_it/token.py b/markdown_it/token.py index 83cfb57b..1c16ca64 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -157,67 +157,3 @@ def from_dict(cls, dct: MutableMapping[str, Any]) -> Token: if token.children: token.children = [cls.from_dict(c) for c in token.children] # type: ignore[arg-type] return token - - -@attr.s(slots=True) -class NestedTokens: - """A class that closely resembles a Token, - but for a an opening/closing Token pair, and their containing children. - """ - - opening: Token = attr.ib() - closing: Token = attr.ib() - children: list[Token | NestedTokens] = attr.ib(factory=list) - - def __getattr__(self, name): - return getattr(self.opening, name) - - def attrGet(self, name: str) -> None | str | int | float: - """Get the value of attribute `name`, or null if it does not exist.""" - return self.opening.attrGet(name) - - -def nest_tokens(tokens: list[Token]) -> list[Token | NestedTokens]: - """Convert the token stream to a list of tokens and nested tokens. - - ``NestedTokens`` contain the open and close tokens and a list of children - of all tokens in between (recursively nested) - """ - warnings.warn( - "`markdown_it.token.nest_tokens` and `markdown_it.token.NestedTokens`" - " are deprecated. Please migrate to `markdown_it.tree.SyntaxTreeNode`", - DeprecationWarning, - ) - - output: list[Token | NestedTokens] = [] - - tokens = list(reversed(tokens)) - while tokens: - token = tokens.pop() - - if token.nesting == 0: - token = token.copy() - output.append(token) - if token.children: - # Ignore type checkers because `nest_tokens` doesn't respect - # typing of `Token.children`. We add `NestedTokens` into a - # `list[Token]` here. - token.children = nest_tokens(token.children) # type: ignore - continue - - assert token.nesting == 1, token.nesting - - nested_tokens = [token] - nesting = 1 - while tokens and nesting != 0: - token = tokens.pop() - nested_tokens.append(token) - nesting += token.nesting - if nesting != 0: - raise ValueError(f"unclosed tokens starting {nested_tokens[0]}") - - child = NestedTokens(nested_tokens[0], nested_tokens[-1]) - output.append(child) - child.children = nest_tokens(nested_tokens[1:-1]) - - return output diff --git a/tests/test_api/test_token.py b/tests/test_api/test_token.py index a35616b2..e3806b50 100644 --- a/tests/test_api/test_token.py +++ b/tests/test_api/test_token.py @@ -1,6 +1,6 @@ import warnings -from markdown_it.token import NestedTokens, Token, nest_tokens +from markdown_it.token import Token def test_token(): @@ -36,33 +36,3 @@ def test_token(): def test_serialization(): token = Token("name", "tag", 0, children=[Token("other", "tag2", 0)]) assert token == Token.from_dict(token.as_dict()) - - -def test_nest_tokens(): - tokens = nest_tokens( - [ - Token("start", "", 0), - Token("open", "", 1), - Token("open_inner", "", 1), - Token("inner", "", 0), - Token("close_inner", "", -1), - Token("close", "", -1), - Token("end", "", 0), - ] - ) - assert [t.type for t in tokens] == ["start", "open", "end"] - assert isinstance(tokens[0], Token) - assert isinstance(tokens[1], NestedTokens) - assert isinstance(tokens[2], Token) - - nested = tokens[1] - assert nested.opening.type == "open" - assert nested.closing.type == "close" - assert len(nested.children) == 1 - assert nested.children[0].type == "open_inner" - - nested2 = nested.children[0] - assert nested2.opening.type == "open_inner" - assert nested2.closing.type == "close_inner" - assert len(nested2.children) == 1 - assert nested2.children[0].type == "inner" From 6649229ed39e711d8ba9d8a29a1081897d0d864f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 15 Apr 2022 02:07:57 +0200 Subject: [PATCH 08/97] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20REFACTOR:=20Move=20`?= =?UTF-8?q?Token`=20to=20`dataclass`=20(#212)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to remove `attrs` dependency. --- markdown_it/token.py | 112 +++++++++++++++++++++++++------------------ pyproject.toml | 2 +- 2 files changed, 67 insertions(+), 47 deletions(-) diff --git a/markdown_it/token.py b/markdown_it/token.py index 1c16ca64..b3c24728 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -1,11 +1,10 @@ from __future__ import annotations from collections.abc import Callable, MutableMapping +import dataclasses as dc from typing import Any import warnings -import attr - def convert_attrs(value: Any) -> Any: """Convert Token.attrs set as ``None`` or ``[[key, value], ...]`` to a dict. @@ -19,43 +18,65 @@ def convert_attrs(value: Any) -> Any: return value -@attr.s(slots=True) +@dc.dataclass() class Token: - # Type of the token (string, e.g. "paragraph_open") - type: str = attr.ib() - # html tag name, e.g. "p" - tag: str = attr.ib() - # Level change (number in {-1, 0, 1} set), where: - # - `1` means the tag is opening - # - `0` means the tag is self-closing - # - `-1` means the tag is closing - nesting: int = attr.ib() - # Html attributes. Note this differs from the upstream "list of lists" format - attrs: dict[str, str | int | float] = attr.ib(factory=dict, converter=convert_attrs) - # Source map info. Format: `[ line_begin, line_end ]` - map: list[int] | None = attr.ib(default=None) - # nesting level, the same as `state.level` - level: int = attr.ib(default=0) - # An array of child nodes (inline and img tokens) - children: list[Token] | None = attr.ib(default=None) - # In a case of self-closing tag (code, html, fence, etc.), - # it has contents of this tag. - content: str = attr.ib(default="") - # '*' or '_' for emphasis, fence string for fence, etc. - markup: str = attr.ib(default="") - # Additional information: - # - Info string for "fence" tokens - # - The value "auto" for autolink "link_open" and "link_close" tokens - # - The string value of the item marker for ordered-list "list_item_open" tokens - info: str = attr.ib(default="") - # A place for plugins to store any arbitrary data - meta: dict = attr.ib(factory=dict) - # True for block-level tokens, false for inline tokens. - # Used in renderer to calculate line breaks - block: bool = attr.ib(default=False) - # If it's true, ignore this element when rendering. - # Used for tight lists to hide paragraphs. - hidden: bool = attr.ib(default=False) + + type: str + """Type of the token (string, e.g. "paragraph_open")""" + + tag: str + """HTML tag name, e.g. 'p'""" + + nesting: int + """Level change (number in {-1, 0, 1} set), where: + - `1` means the tag is opening + - `0` means the tag is self-closing + - `-1` means the tag is closing + """ + + attrs: dict[str, str | int | float] = dc.field(default_factory=dict) + """HTML attributes. + Note this differs from the upstream "list of lists" format, + although than an instance can still be initialised with this format. + """ + + map: list[int] | None = None + """Source map info. Format: `[ line_begin, line_end ]`""" + + level: int = 0 + """Nesting level, the same as `state.level`""" + + children: list[Token] | None = None + """Array of child nodes (inline and img tokens).""" + + content: str = "" + """Inner content, in the case of a self-closing tag (code, html, fence, etc.),""" + + markup: str = "" + """'*' or '_' for emphasis, fence string for fence, etc.""" + + info: str = "" + """Additional information: + - Info string for "fence" tokens + - The value "auto" for autolink "link_open" and "link_close" tokens + - The string value of the item marker for ordered-list "list_item_open" tokens + """ + + meta: dict = dc.field(default_factory=dict) + """A place for plugins to store any arbitrary data""" + + block: bool = False + """True for block-level tokens, false for inline tokens. + Used in renderer to calculate line breaks + """ + + hidden: bool = False + """If true, ignore this element when rendering. + Used for tight lists to hide paragraphs. + """ + + def __post_init__(self): + self.attrs = convert_attrs(self.attrs) def attrIndex(self, name: str) -> int: warnings.warn( @@ -98,9 +119,9 @@ def attrJoin(self, name: str, value: str) -> None: else: self.attrs[name] = value - def copy(self) -> Token: + def copy(self, **changes: Any) -> Token: """Return a shallow copy of the instance.""" - return attr.evolve(self) + return dc.replace(self, **changes) def as_dict( self, @@ -108,7 +129,7 @@ def as_dict( children: bool = True, as_upstream: bool = True, meta_serializer: Callable[[dict], Any] | None = None, - filter: Callable[[attr.Attribute, Any], bool] | None = None, + filter: Callable[[str, Any], bool] | None = None, dict_factory: Callable[..., MutableMapping[str, Any]] = dict, ) -> MutableMapping[str, Any]: """Return the token as a dictionary. @@ -119,16 +140,15 @@ def as_dict( :param meta_serializer: hook for serializing ``Token.meta`` :param filter: A callable whose return code determines whether an attribute or element is included (``True``) or dropped (``False``). - Is called with the `attr.Attribute` as the first argument and the - value as the second argument. + Is called with the (key, value) pair. :param dict_factory: A callable to produce dictionaries from. For example, to produce ordered dictionaries instead of normal Python dictionaries, pass in ``collections.OrderedDict``. """ - mapping = attr.asdict( - self, recurse=False, filter=filter, dict_factory=dict_factory # type: ignore[arg-type] - ) + mapping = dict_factory((f.name, getattr(self, f.name)) for f in dc.fields(self)) + if filter: + mapping = dict_factory((k, v) for k, v in mapping.items() if filter(k, v)) if as_upstream and "attrs" in mapping: mapping["attrs"] = ( None diff --git a/pyproject.toml b/pyproject.toml index 1ef8304c..e0017185 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ classifiers = [ keywords = ["markdown", "lexer", "parser", "commonmark", "markdown-it"] requires-python = ">=3.7" dependencies = [ - "attrs>=19,<22", "mdurl~=0.1", "typing_extensions>=3.7.4;python_version<'3.8'", ] @@ -47,6 +46,7 @@ compare = [ linkify = ["linkify-it-py~=1.0"] plugins = ["mdit-py-plugins"] rtd = [ + "attrs", "myst-parser", "pyyaml", "sphinx", From 8928b0e7e9752aa5687e7ea7acac5d6423585c36 Mon Sep 17 00:00:00 2001 From: Thibaud Colas Date: Fri, 15 Apr 2022 01:11:15 +0100 Subject: [PATCH 09/97] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20Update=20usage=20g?= =?UTF-8?q?uide=20to=20use=20PyPI=20package=20name=20(#202)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Chris Sewell --- docs/using.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/using.md b/docs/using.md index 5c977b5a..83872037 100644 --- a/docs/using.md +++ b/docs/using.md @@ -16,7 +16,7 @@ kernelspec: > This document can be opened to execute with [Jupytext](https://jupytext.readthedocs.io)! -markdown-it-py may be used as an API *via* the `markdown_it` package. +markdown-it-py may be used as an API *via* the [`markdown-it-py`](https://pypi.org/project/markdown-it-py/) package. The raw text is first parsed to syntax 'tokens', then these are converted to other formats using 'renderers'. From 2a67873c5d3d45dd78682e9384d964b4f8d528f9 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Fri, 15 Apr 2022 03:21:01 +0300 Subject: [PATCH 10/97] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20Combination=20of=20?= =?UTF-8?q?blockquotes,=20list=20and=20newlines=20causes=20IndexError=20(#?= =?UTF-8?q?207)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markdown_it/rules_block/blockquote.py | 2 -- markdown_it/rules_block/state_block.py | 3 +-- tests/test_port/fixtures/commonmark_extras.md | 21 +++++++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 7f72b896..6575731d 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -296,6 +296,4 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): state.blkIndent = oldIndent - state.lineMax += 1 - return True diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 4b32219f..42b8fce3 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -147,8 +147,7 @@ def skipEmptyLines(self, from_pos: int) -> int: ]: break except IndexError: - from_pos += 1 - break + pass from_pos += 1 return from_pos diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md index c2df5084..168b039d 100644 --- a/tests/test_port/fixtures/commonmark_extras.md +++ b/tests/test_port/fixtures/commonmark_extras.md @@ -673,3 +673,24 @@ Issue #205. Space in link destination generates IndexError

[Contact](http:// mail.com)

[Contact](mailto: mail@mail.com)

. + +Issue #204. Combination of blockquotes, list and newlines causes an IndexError +. +> QUOTE ++ UNORDERED LIST ITEM + > INDENTED QUOTE + + + +. +
+

QUOTE

+
+
    +
  • UNORDERED LIST ITEM +
    +

    INDENTED QUOTE

    +
    +
  • +
+. From 145a484a94e3f1860f4feecf3a3fb706273dc50d Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Sat, 16 Apr 2022 16:48:02 +0300 Subject: [PATCH 11/97] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20REFACTOR:=20=5F=5Fsl?= =?UTF-8?q?ots=5F=5F=20for=20dataclasses=20(#214)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markdown_it/_compat.py | 10 ++++++++++ markdown_it/ruler.py | 4 +++- markdown_it/rules_inline/state_inline.py | 3 ++- markdown_it/token.py | 4 +++- 4 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 markdown_it/_compat.py diff --git a/markdown_it/_compat.py b/markdown_it/_compat.py new file mode 100644 index 00000000..12df1aa6 --- /dev/null +++ b/markdown_it/_compat.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from collections.abc import Mapping +import sys +from typing import Any + +if sys.version_info >= (3, 10): + DATACLASS_KWARGS: Mapping[str, Any] = {"slots": True} +else: + DATACLASS_KWARGS: Mapping[str, Any] = {} diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index b46bac69..11b937a0 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -21,6 +21,8 @@ class Ruler from dataclasses import dataclass, field from typing import TYPE_CHECKING +from markdown_it._compat import DATACLASS_KWARGS + if TYPE_CHECKING: from markdown_it import MarkdownIt @@ -50,7 +52,7 @@ def src(self, value: str) -> None: RuleFunc = Callable -@dataclass() +@dataclass(**DATACLASS_KWARGS) class Rule: name: str enabled: bool diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 442dac81..283532cc 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING +from .._compat import DATACLASS_KWARGS from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace from ..ruler import StateBase from ..token import Token @@ -13,7 +14,7 @@ from markdown_it import MarkdownIt -@dataclass() +@dataclass(**DATACLASS_KWARGS) class Delimiter: # Char code of the starting marker (number). marker: int diff --git a/markdown_it/token.py b/markdown_it/token.py index b3c24728..b20875b6 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -5,6 +5,8 @@ from typing import Any import warnings +from markdown_it._compat import DATACLASS_KWARGS + def convert_attrs(value: Any) -> Any: """Convert Token.attrs set as ``None`` or ``[[key, value], ...]`` to a dict. @@ -18,7 +20,7 @@ def convert_attrs(value: Any) -> Any: return value -@dc.dataclass() +@dc.dataclass(**DATACLASS_KWARGS) class Token: type: str From 7e677c4e7b4573eaf406a13882f3fee4b19b97f4 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 16 Apr 2022 15:51:17 +0200 Subject: [PATCH 12/97] =?UTF-8?q?=F0=9F=9A=80=20RELEASE:=20v2.1.0=20(#213)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 14 ++++++++++++++ markdown_it/__init__.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e463bab..1aa4a725 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Change Log +## 2.1.0 - 2022-04-15 + +This release is primarily to replace the `attrs` package dependency, +with the built-in Python `dataclasses` package. + +This should not be a breaking change, for most use cases. + +- ⬆️ UPGRADE: Drop support for EOL Python 3.6 (#194) +- ♻️ REFACTOR: Move `Rule`/`Delimiter` classes from `attrs` to `dataclass` (#211) +- ♻️ REFACTOR: Move `Token` class from `attrs` to `dataclass` (#211) +- ‼️ Remove deprecated `NestedTokens` and `nest_tokens` +- ✨ NEW: Save ordered list numbering (#192) +- 🐛 FIX: Combination of blockquotes, list and newlines causes `IndexError` (#207) + ## 2.0.1 - 2022-24-01 - 🐛 FIX: Crash when file ends with empty blockquote line. diff --git a/markdown_it/__init__.py b/markdown_it/__init__.py index 887f4f53..5cc232a5 100644 --- a/markdown_it/__init__.py +++ b/markdown_it/__init__.py @@ -1,5 +1,5 @@ """A Python port of Markdown-It""" __all__ = ("MarkdownIt",) -__version__ = "2.0.1" +__version__ = "2.1.0" from .main import MarkdownIt From 53084e1ffa82323e37fe2d17a1b53d1dc66e5afd Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Mon, 9 May 2022 02:10:18 +0300 Subject: [PATCH 13/97] =?UTF-8?q?=F0=9F=94=A7=20MAINTAIN:=20Update=20mypy'?= =?UTF-8?q?s=20additional=20dependencies=20(#217)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b9b3f0c0..fa85ca98 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,4 +43,4 @@ repos: rev: v0.942 hooks: - id: mypy - additional_dependencies: [attrs] + additional_dependencies: [mdurl] From 9be8e22c55a28a7408195ca4d88f8d69b4c7d7a7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Jul 2022 01:09:19 +0200 Subject: [PATCH 14/97] [pre-commit.ci] pre-commit autoupdate (#215) --- .pre-commit-config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fa85ca98..27170395 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ exclude: > repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.2.0 + rev: v4.3.0 hooks: - id: check-json - id: check-yaml @@ -29,18 +29,18 @@ repos: - id: isort - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 22.6.0 hooks: - id: black - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 + - repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 hooks: - id: flake8 additional_dependencies: [flake8-bugbear==21.3.1] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.942 + rev: v0.961 hooks: - id: mypy additional_dependencies: [mdurl] From 9f642d0a542894115180da09820caa044b879f56 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 2 Aug 2022 23:06:47 +0200 Subject: [PATCH 15/97] [pre-commit.ci] pre-commit autoupdate (#220) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 27170395..2cda2619 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,13 +34,13 @@ repos: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 + rev: 5.0.2 hooks: - id: flake8 - additional_dependencies: [flake8-bugbear==21.3.1] + additional_dependencies: [flake8-bugbear~=22.7] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.961 + rev: v0.971 hooks: - id: mypy additional_dependencies: [mdurl] From 4e8947419b87f5d80fd46367e63ed2aa03307a46 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Tue, 24 Jan 2023 20:21:28 +0100 Subject: [PATCH 16/97] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20Fix=20typo=20(#230?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/using.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/using.md b/docs/using.md index 83872037..6bf09e46 100644 --- a/docs/using.md +++ b/docs/using.md @@ -330,7 +330,7 @@ md = MarkdownIt("commonmark", renderer_cls=MyRenderer) md.render("*a*") ``` -Plugins can support multiple render types, using the `__ouput__` attribute (this is currently a Python only feature). +Plugins can support multiple render types, using the `__output__` attribute (this is currently a Python only feature). ```{code-cell} python from markdown_it.renderer import RendererHTML From 9740e23579d13c0e78f8d8c4458bc96b2c2674c6 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 04:55:38 +0100 Subject: [PATCH 17/97] =?UTF-8?q?=F0=9F=94=A7=20pre-commit=20autoupdate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 10 +++++----- markdown_it/common/utils.py | 1 - markdown_it/helpers/parse_link_label.py | 1 - markdown_it/renderer.py | 3 +-- markdown_it/rules_block/blockquote.py | 2 -- markdown_it/rules_block/code.py | 1 - markdown_it/rules_block/fence.py | 1 - markdown_it/rules_block/heading.py | 1 - markdown_it/rules_block/hr.py | 1 - markdown_it/rules_block/lheading.py | 1 - markdown_it/rules_block/list.py | 4 ---- markdown_it/rules_block/paragraph.py | 1 - markdown_it/rules_block/reference.py | 1 - markdown_it/rules_block/state_block.py | 1 - markdown_it/rules_core/block.py | 1 - markdown_it/rules_core/normalize.py | 1 - markdown_it/rules_core/smartquotes.py | 1 - markdown_it/rules_inline/autolink.py | 1 - markdown_it/rules_inline/backticks.py | 1 - markdown_it/rules_inline/balance_pairs.py | 2 -- markdown_it/rules_inline/emphasis.py | 1 - markdown_it/rules_inline/entity.py | 1 - markdown_it/rules_inline/html_inline.py | 1 - markdown_it/rules_inline/image.py | 1 - markdown_it/rules_inline/link.py | 1 - markdown_it/rules_inline/strikethrough.py | 2 -- markdown_it/token.py | 1 - tests/test_port/test_references.py | 1 - 28 files changed, 6 insertions(+), 39 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2cda2619..50e8132a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ exclude: > repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-json - id: check-yaml @@ -24,23 +24,23 @@ repos: - id: trailing-whitespace - repo: https://github.com/pycqa/isort - rev: 5.10.1 + rev: 5.12.0 hooks: - id: isort - repo: https://github.com/psf/black - rev: 22.6.0 + rev: 23.1.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 5.0.2 + rev: 6.0.0 hooks: - id: flake8 additional_dependencies: [flake8-bugbear~=22.7] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.971 + rev: v1.0.1 hooks: - id: mypy additional_dependencies: [mdurl] diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index edc24ca5..9b7c4aeb 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -56,7 +56,6 @@ def arrayReplaceAt(src: list, pos: int, newElements: list) -> list: def isValidEntityCode(c: int) -> bool: - # broken sequence if c >= 0xD800 and c <= 0xDFFF: return False diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py index 20e3c148..6ce8daf8 100644 --- a/markdown_it/helpers/parse_link_label.py +++ b/markdown_it/helpers/parse_link_label.py @@ -9,7 +9,6 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) -> int: - labelEnd = -1 oldPos = state.pos found = False diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index b8bfe4da..aa6272a3 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -18,7 +18,7 @@ class Renderer try: from typing import Protocol except ImportError: # Python <3.8 doesn't have `Protocol` in the stdlib - from typing_extensions import Protocol # type: ignore[misc] + from typing_extensions import Protocol # type: ignore class RendererProtocol(Protocol): @@ -82,7 +82,6 @@ def render( result = "" for i, token in enumerate(tokens): - if token.type == "inline": assert token.children is not None result += self.renderInline(token.children, options, env) diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 6575731d..e00fbf61 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -10,7 +10,6 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): - LOGGER.debug( "entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent ) @@ -129,7 +128,6 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): # for (nextLine = startLine + 1; nextLine < endLine; nextLine++) { nextLine = startLine + 1 while nextLine < endLine: - # check if it's outdented, i.e. it's inside list item and indented # less than said list item: # diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py index c4fdba33..a796608d 100644 --- a/markdown_it/rules_block/code.py +++ b/markdown_it/rules_block/code.py @@ -7,7 +7,6 @@ def code(state: StateBlock, startLine: int, endLine: int, silent: bool = False): - LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent) if state.sCount[startLine] - state.blkIndent < 4: diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index c4f5275d..fb3c6847 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -7,7 +7,6 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): - LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent) haveEndMarker = False diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 8d4ef3e2..064d0702 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -10,7 +10,6 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): - LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent) pos = state.bMarks[startLine] + state.tShift[startLine] diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 804cd9db..22c69722 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -11,7 +11,6 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): - LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent) pos = state.bMarks[startLine] + state.tShift[startLine] diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index f26e2af0..92632acc 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -8,7 +8,6 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool): - LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent) level = None diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index a7617ad2..9cf8c402 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -10,7 +10,6 @@ # Search `[-+*][\n ]`, returns next pos after marker on success # or -1 on fail. def skipBulletListMarker(state: StateBlock, startLine: int): - pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] @@ -33,7 +32,6 @@ def skipBulletListMarker(state: StateBlock, startLine: int): # Search `\d+[.)][\n ]`, returns next pos after marker on success # or -1 on fail. def skipOrderedListMarker(state: StateBlock, startLine: int): - start = state.bMarks[startLine] + state.tShift[startLine] pos = start maximum = state.eMarks[startLine] @@ -59,7 +57,6 @@ def skipOrderedListMarker(state: StateBlock, startLine: int): # /* 0 */ /* 9 */ if ch >= 0x30 and ch <= 0x39: - # List marker should have no more than 9 digits # (prevents integer overflow in browsers) if pos - start >= 10: @@ -97,7 +94,6 @@ def markTightParagraphs(state: StateBlock, idx: int): def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): - LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent) isTerminatingParagraph = False diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py index 4fee83e9..fef7edf7 100644 --- a/markdown_it/rules_block/paragraph.py +++ b/markdown_it/rules_block/paragraph.py @@ -8,7 +8,6 @@ def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool = False): - LOGGER.debug( "entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent ) diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 35adde2a..39e21eb6 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -7,7 +7,6 @@ def reference(state: StateBlock, startLine, _endLine, silent): - LOGGER.debug( "entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent ) diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 42b8fce3..c5589149 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -19,7 +19,6 @@ def __init__( tokens: list[Token], srcCharCode: tuple[int, ...] | None = None, ): - if srcCharCode is not None: self._src = src self.srcCharCode = srcCharCode diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py index fa1c52c4..dc756418 100644 --- a/markdown_it/rules_core/block.py +++ b/markdown_it/rules_core/block.py @@ -3,7 +3,6 @@ def block(state: StateCore) -> None: - if state.inlineMode: token = Token("inline", "", 0) token.content = state.src diff --git a/markdown_it/rules_core/normalize.py b/markdown_it/rules_core/normalize.py index bf16fd7a..c9f8d0d5 100644 --- a/markdown_it/rules_core/normalize.py +++ b/markdown_it/rules_core/normalize.py @@ -9,7 +9,6 @@ def normalize(state: StateCore) -> None: - # Normalize newlines string = NEWLINES_RE.sub("\n", state.src) diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index 93f8be28..7a39fad4 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -195,7 +195,6 @@ def smartquotes(state: StateCore) -> None: return for token in state.tokens: - if token.type != "inline" or not QUOTE_RE.search(token.content): continue assert token.children is not None diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index a4ee61c3..11ac5905 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -10,7 +10,6 @@ def autolink(state: StateInline, silent: bool) -> bool: - pos = state.pos if state.srcCharCode[pos] != 0x3C: # /* < */ diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py index 7bff12fe..5f1e0552 100644 --- a/markdown_it/rules_inline/backticks.py +++ b/markdown_it/rules_inline/backticks.py @@ -7,7 +7,6 @@ def backtick(state: StateInline, silent: bool) -> bool: - pos = state.pos ch = state.srcCharCode[pos] diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index db622f07..5423b5d6 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -4,7 +4,6 @@ def processDelimiters(state: StateInline, delimiters, *args): - openersBottom = {} maximum = len(delimiters) @@ -50,7 +49,6 @@ def processDelimiters(state: StateInline, delimiters, *args): continue if opener.open and opener.end < 0: - isOddMatch = False # from spec: diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index 9001b09e..5262430b 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -39,7 +39,6 @@ def tokenize(state: StateInline, silent: bool): def _postProcess(state, delimiters): - i = len(delimiters) - 1 while i >= 0: startDelim = delimiters[i] diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 883a9666..08d271ed 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -10,7 +10,6 @@ def entity(state: StateInline, silent: bool): - pos = state.pos maximum = state.posMax diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 295cc5c7..b875e884 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -10,7 +10,6 @@ def isLetter(ch: int): def html_inline(state: StateInline, silent: bool): - pos = state.pos if not state.md.options.get("html", None): diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index d2a08d47..d7215bdf 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -7,7 +7,6 @@ def image(state: StateInline, silent: bool): - label = None href = "" oldPos = state.pos diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 2394d6c3..a6345152 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -5,7 +5,6 @@ def link(state: StateInline, silent: bool): - href = "" title = "" label = None diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index 107ea26b..9b062a66 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -53,7 +53,6 @@ def tokenize(state: StateInline, silent: bool): def _postProcess(state: StateInline, delimiters: list[Delimiter]): - loneMarkers = [] maximum = len(delimiters) @@ -89,7 +88,6 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]): state.tokens[endDelim.token - 1].type == "text" and state.tokens[endDelim.token - 1].content == "~" ): - loneMarkers.append(endDelim.token - 1) i += 1 diff --git a/markdown_it/token.py b/markdown_it/token.py index b20875b6..7a41a784 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -22,7 +22,6 @@ def convert_attrs(value: Any) -> Any: @dc.dataclass(**DATACLASS_KWARGS) class Token: - type: str """Type of the token (string, e.g. "paragraph_open")""" diff --git a/tests/test_port/test_references.py b/tests/test_port/test_references.py index 32e389de..75bf7130 100644 --- a/tests/test_port/test_references.py +++ b/tests/test_port/test_references.py @@ -2,7 +2,6 @@ def test_ref_definitions(): - md = MarkdownIt() src = "[a]: abc\n\n[b]: xyz\n\n[b]: ijk" env = {} From b6682498a7790cc16707e891e9b890dbcb6e3250 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 04:57:45 +0100 Subject: [PATCH 18/97] =?UTF-8?q?=F0=9F=A7=AA=20TESTS=20unpin=20pytest-ben?= =?UTF-8?q?chmark?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e0017185..4c8a169a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ testing = [ benchmarking = [ "psutil", "pytest", - "pytest-benchmark~=3.2", + "pytest-benchmark", ] profiling = ["gprof2dot"] From 23f22bc1bc878e96f6d194b589d9dcdc44dc2e45 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 05:00:28 +0100 Subject: [PATCH 19/97] =?UTF-8?q?=F0=9F=94=A7=20Add=20dependabot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/dependabot.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..786be571 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,19 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + commit-message: + prefix: ⬆️ + schedule: + interval: weekly + - package-ecosystem: pip + directory: / + commit-message: + prefix: ⬆️ + schedule: + interval: weekly From f83c969f7c44e033ec7398c38c49990f36962210 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 22 Feb 2023 05:08:13 +0100 Subject: [PATCH 20/97] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20r-lib/actions?= =?UTF-8?q?=20from=201=20to=202=20(#243)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index ef5780b3..abf3f620 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -17,7 +17,7 @@ jobs: python-version: 3.8 - name: install pandoc - uses: r-lib/actions/setup-pandoc@v1 + uses: r-lib/actions/setup-pandoc@v2 with: pandoc-version: '2.6' From 2160a3daec2a7a007e5a7f5f941eaaad001a2d95 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 05:16:34 +0100 Subject: [PATCH 21/97] =?UTF-8?q?=F0=9F=94=A7=20Bump=20GH=20actions=20(#24?= =?UTF-8?q?4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/benchmark.yml | 2 +- .github/workflows/tests.yml | 16 ++++++++-------- pyproject.toml | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index abf3f620..c2b82d33 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -12,7 +12,7 @@ jobs: - uses: actions/checkout@v2 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.8 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e46c0847..64a068e3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,10 +21,10 @@ jobs: steps: - uses: actions/checkout@v2 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: '3.8' - - uses: pre-commit/action@v2.0.0 + - uses: pre-commit/action@v3.0.0 tests: @@ -36,7 +36,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -48,7 +48,7 @@ jobs: pytest tests/ --cov=markdown_it --cov-report=xml --cov-report=term-missing - name: Upload to Codecov if: matrix.python-version == '3.7' && github.repository == 'executablebooks/markdown-it-py' - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 with: name: markdown-it-py-pytests-py3.7 flags: pytests @@ -64,7 +64,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install markdown-it-py @@ -85,7 +85,7 @@ jobs: - uses: actions/checkout@v2 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: '3.8' @@ -98,7 +98,7 @@ jobs: run: tox -e py38-bench-core -- --benchmark-json bench-core.json - name: Upload data - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: bench-core path: bench-core.json @@ -114,7 +114,7 @@ jobs: - name: Checkout source uses: actions/checkout@v2 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: '3.8' - name: install flit diff --git a/pyproject.toml b/pyproject.toml index 4c8a169a..9a491e8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ Homepage = "https://github.com/executablebooks/markdown-it-py" Documentation = "https://markdown-it-py.readthedocs.io/" [project.optional-dependencies] -code_style = ["pre-commit==2.6"] +code_style = ["pre-commit~=3.0"] compare = [ "commonmark~=0.9.1", "markdown~=3.3.6", From c9f6856dcc3f5f73ce01571dd280d6139b0c1185 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 05:22:38 +0100 Subject: [PATCH 22/97] =?UTF-8?q?=F0=9F=94=A7=20Update=20benchmark=20pkg?= =?UTF-8?q?=20versions=20(#245)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9a491e8b..76bb25f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,11 +37,11 @@ Documentation = "https://markdown-it-py.readthedocs.io/" [project.optional-dependencies] code_style = ["pre-commit~=3.0"] compare = [ - "commonmark~=0.9.1", - "markdown~=3.3.6", - "mistletoe~=0.8.1", - "mistune~=2.0.2", - "panflute~=2.1.3", + "commonmark~=0.9", + "markdown~=3.4", + "mistletoe~=1.0", + "mistune~=2.0", + "panflute~=2.3", ] linkify = ["linkify-it-py~=1.0"] plugins = ["mdit-py-plugins"] From 032c742671c4d6ad12ecd5cd072164e2f3812c12 Mon Sep 17 00:00:00 2001 From: holamgadol Date: Wed, 22 Feb 2023 07:34:12 +0300 Subject: [PATCH 23/97] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20Add=20section=20ab?= =?UTF-8?q?out=20markdown=20renderer=20(#227)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Chris Sewell --- docs/using.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/using.md b/docs/using.md index 6bf09e46..aa632574 100644 --- a/docs/using.md +++ b/docs/using.md @@ -397,3 +397,29 @@ md = MarkdownIt("commonmark") md.add_render_rule("link_open", render_blank_link) print(md.render("[a]\n\n[a]: b")) ``` + +### Markdown renderer + +You can also render a token stream directly to markdown via the `MDRenderer` class from [`mdformat`](https://github.com/executablebooks/mdformat): + +```{code-cell} python +from markdown_it import MarkdownIt +from mdformat.renderer import MDRenderer + +md = MarkdownIt("commonmark") + +source_markdown = """ +Here's some *text* + +1. a list + +> a *quote*""" + +tokens = md.parse(source_markdown) + +renderer = MDRenderer() +options = {} +env = {} + +output_markdown = renderer.render(tokens, options, env) +``` From 4670f0cdd7a9e8ab7523f51b0beb1d4ea27bb1b7 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Wed, 22 Feb 2023 06:36:36 +0200 Subject: [PATCH 24/97] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20UPGRADE:=20Allow=20l?= =?UTF-8?q?inkify-it-py=20v2=20(#218)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Chris Sewell --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 76bb25f3..5ac1ad27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ compare = [ "mistune~=2.0", "panflute~=2.3", ] -linkify = ["linkify-it-py~=1.0"] +linkify = ["linkify-it-py>=1,<3"] plugins = ["mdit-py-plugins"] rtd = [ "attrs", From 2c93e0b6a8aec7e5a6e1bdef502de7d95ec2a192 Mon Sep 17 00:00:00 2001 From: redstoneleo Date: Wed, 22 Feb 2023 12:41:25 +0800 Subject: [PATCH 25/97] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20Update=20the=20exa?= =?UTF-8?q?mple=20(#229)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Chris Sewell --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 599c1f9f..789588fe 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,9 @@ from mdit_py_plugins.front_matter import front_matter_plugin from mdit_py_plugins.footnote import footnote_plugin md = ( - MarkdownIt() + MarkdownIt('commonmark' ,{'breaks':True,'html':True}) .use(front_matter_plugin) .use(footnote_plugin) - .disable('image') .enable('table') ) text = (""" From ae03c6107dfa18e648f6fdd1280f5b89092d5d49 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 05:56:39 +0100 Subject: [PATCH 26/97] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20CVE-2023-26303=20(#?= =?UTF-8?q?246)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix unnecessary asserts, leading to crashes --- markdown_it/renderer.py | 20 ++++++++------------ markdown_it/rules_core/replacements.py | 3 ++- markdown_it/rules_core/smartquotes.py | 4 ++-- tests/test_port/fixtures/issue-fixes.md | 9 +++++++++ tests/test_port/test_fixtures.py | 1 + 5 files changed, 22 insertions(+), 15 deletions(-) diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index aa6272a3..81d0bc37 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -83,8 +83,8 @@ def render( for i, token in enumerate(tokens): if token.type == "inline": - assert token.children is not None - result += self.renderInline(token.children, options, env) + if token.children: + result += self.renderInline(token.children, options, env) elif token.type in self.rules: result += self.rules[token.type](tokens, i, options, env) else: @@ -206,8 +206,8 @@ def renderInlineAsText( if token.type == "text": result += token.content elif token.type == "image": - assert token.children is not None - result += self.renderInlineAsText(token.children, options, env) + if token.children: + result += self.renderInlineAsText(token.children, options, env) elif token.type == "softbreak": result += "\n" @@ -305,14 +305,10 @@ def image( # "alt" attr MUST be set, even if empty. Because it's mandatory and # should be placed on proper position for tests. - - assert ( - token.attrs and "alt" in token.attrs - ), '"image" token\'s attrs must contain `alt`' - - # Replace content with actual value - - token.attrSet("alt", self.renderInlineAsText(token.children, options, env)) + if token.children: + token.attrSet("alt", self.renderInlineAsText(token.children, options, env)) + else: + token.attrSet("alt", "") return self.renderToken(tokens, idx, options, env) diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index 45377d3e..5e9b7ae7 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -116,7 +116,8 @@ def replace(state: StateCore) -> None: for token in state.tokens: if token.type != "inline": continue - assert token.children is not None + if token.children is None: + continue if SCOPED_ABBR_RE.search(token.content): replace_scoped(token.children) diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index 7a39fad4..b11a5739 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -197,5 +197,5 @@ def smartquotes(state: StateCore) -> None: for token in state.tokens: if token.type != "inline" or not QUOTE_RE.search(token.content): continue - assert token.children is not None - process_inlines(token.children, state) + if token.children is not None: + process_inlines(token.children, state) diff --git a/tests/test_port/fixtures/issue-fixes.md b/tests/test_port/fixtures/issue-fixes.md index 0c693b04..319945af 100644 --- a/tests/test_port/fixtures/issue-fixes.md +++ b/tests/test_port/fixtures/issue-fixes.md @@ -36,3 +36,12 @@ .

💬

. + +Fix CVE-2023-26303 +. +![![]() +]([) +. +


+

+. diff --git a/tests/test_port/test_fixtures.py b/tests/test_port/test_fixtures.py index 5117c5e1..d2199caf 100644 --- a/tests/test_port/test_fixtures.py +++ b/tests/test_port/test_fixtures.py @@ -111,4 +111,5 @@ def test_strikethrough(line, title, input, expected): def test_issue_fixes(line, title, input, expected): md = MarkdownIt() text = md.render(input) + print(text) assert text.rstrip() == expected.rstrip() From cf8b2d8c561233d4c18c55e80b68c8d06850fda6 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 06:32:13 +0100 Subject: [PATCH 27/97] =?UTF-8?q?=F0=9F=94=A7=20Create=20SECURITY.md=20(#2?= =?UTF-8?q?48)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- SECURITY.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..aaa878f4 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,14 @@ +# Security Policy + +## Supported Versions + +We generally only support the latest major release, +although critical bug fixes can be released for older versions. + +## Reporting a Vulnerability + +To report a security issue, please email with a description of the issue, +the steps you took to create the issue, affected versions, and, if known, mitigations for the issue. +Our team will respond within 3 working days of your email. +If the issue is confirmed as a vulnerability, we will open a Security Advisory. +This project follows a 90 day disclosure timeline. From 6491bc2491a07a8072e5d40f27eab6430585c42c Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 06:35:57 +0100 Subject: [PATCH 28/97] =?UTF-8?q?=F0=9F=93=9A=20Add=20email=20to=20securit?= =?UTF-8?q?y=20section=20(#249)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/other.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/other.md b/docs/other.md index 4d77360f..cfc5ba8c 100644 --- a/docs/other.md +++ b/docs/other.md @@ -21,7 +21,7 @@ for XSS: So, by default `markdown-it` should be safe. We care about it. -If you find a security problem - contact us via tracker or email. +If you find a security problem - contact us via . Such reports are fixed with top priority. ## Plugins From 53ca3e9c2b9e9b295f6abf7f4ad2730a9b70f68c Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 06:39:56 +0100 Subject: [PATCH 29/97] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20CLI=20crash=20on=20?= =?UTF-8?q?non-utf8=20character=20(#247)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses CVE-2023-26302 --- markdown_it/cli/parse.py | 2 +- tests/test_cli.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/markdown_it/cli/parse.py b/markdown_it/cli/parse.py index 2d74f55a..890d5de3 100644 --- a/markdown_it/cli/parse.py +++ b/markdown_it/cli/parse.py @@ -35,7 +35,7 @@ def convert_file(filename: str) -> None: Parse a Markdown file and dump the output to stdout. """ try: - with open(filename, "r") as fin: + with open(filename, "r", encoding="utf8", errors="ignore") as fin: rendered = MarkdownIt().render(fin.read()) print(rendered, end="") except OSError: diff --git a/tests/test_cli.py b/tests/test_cli.py index 57d6b938..c38e24fd 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,6 +20,13 @@ def test_parse_fail(): assert exc_info.value.code == 1 +def test_non_utf8(): + with tempfile.TemporaryDirectory() as tempdir: + path = pathlib.Path(tempdir).joinpath("test.md") + path.write_bytes(b"\x80abc") + assert parse.main([str(path)]) == 0 + + def test_print_heading(): with patch("builtins.print") as patched: parse.print_heading() From 73a01479212bfe2aea0b995b4d13c8ddca2e4265 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 06:52:12 +0100 Subject: [PATCH 30/97] =?UTF-8?q?=F0=9F=9A=80=20RELEASE:=20v2.2.0=20(#250)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 25 +++++++++++++++++++++++++ markdown_it/__init__.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1aa4a725..fa8dc6b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ # Change Log +## 2.2.0 - 2023-02-22 + +### What's Changed + +* ⬆️ UPGRADE: Allow linkify-it-py v2 by @hukkin in [#218](https://github.com/executablebooks/markdown-it-py/pull/218) +* 🐛 FIX: CVE-2023-26303 by @chrisjsewell in [#246](https://github.com/executablebooks/markdown-it-py/pull/246) +* 🐛 FIX: CLI crash on non-utf8 character by @chrisjsewell in [#247](https://github.com/executablebooks/markdown-it-py/pull/247) +* 📚 DOCS: Update the example by @redstoneleo in [#229](https://github.com/executablebooks/markdown-it-py/pull/229) +* 📚 DOCS: Add section about markdown renderer by @holamgadol in [#227](https://github.com/executablebooks/markdown-it-py/pull/227) +* 🔧 Create SECURITY.md by @chrisjsewell in [#248](https://github.com/executablebooks/markdown-it-py/pull/248) +* 🔧 MAINTAIN: Update mypy's additional dependencies by @hukkin in [#217](https://github.com/executablebooks/markdown-it-py/pull/217) +* Fix typo by @jwilk in [#230](https://github.com/executablebooks/markdown-it-py/pull/230) +* 🔧 Bump GH actions by @chrisjsewell in [#244](https://github.com/executablebooks/markdown-it-py/pull/244) +* 🔧 Update benchmark pkg versions by @chrisjsewell in [#245](https://github.com/executablebooks/markdown-it-py/pull/245) + +### New Contributors + +Thanks to 🎉 + +* @jwilk made their first contribution in [#230](https://github.com/executablebooks/markdown-it-py/pull/230) +* @holamgadol made their first contribution in [#227](https://github.com/executablebooks/markdown-it-py/pull/227) +* @redstoneleo made their first contribution in [#229](https://github.com/executablebooks/markdown-it-py/pull/229) + +**Full Changelog**: + ## 2.1.0 - 2022-04-15 This release is primarily to replace the `attrs` package dependency, diff --git a/markdown_it/__init__.py b/markdown_it/__init__.py index 5cc232a5..882a0c3e 100644 --- a/markdown_it/__init__.py +++ b/markdown_it/__init__.py @@ -1,5 +1,5 @@ """A Python port of Markdown-It""" __all__ = ("MarkdownIt",) -__version__ = "2.1.0" +__version__ = "2.2.0" from .main import MarkdownIt From 34876b1aa42746fbb73a64ce1a18237fb610dd43 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 22 Feb 2023 07:09:18 +0100 Subject: [PATCH 31/97] =?UTF-8?q?=F0=9F=A7=AA=20Add=20Python=203.11=20CI?= =?UTF-8?q?=20(#251)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/tests.yml | 2 +- pyproject.toml | 1 + tox.ini | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 64a068e3..4ec6da14 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10'] + python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 diff --git a/pyproject.toml b/pyproject.toml index 5ac1ad27..5cc879ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", diff --git a/tox.ini b/tox.ini index f4e117e2..85a7179b 100644 --- a/tox.ini +++ b/tox.ini @@ -9,13 +9,13 @@ envlist = py37 [testenv] usedevelop = true -[testenv:py{37,38,39,310}] +[testenv:py{37,38,39,310,311}] extras = linkify testing commands = pytest {posargs:tests/} -[testenv:py{37,38,39,310}-plugins] +[testenv:py{37,38,39,310,311}-plugins] extras = testing changedir = {envtmpdir} allowlist_externals = From 07e9b7d5a62903e0e95f681b8173bc0d7871abd4 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 13 Mar 2023 22:12:58 -0600 Subject: [PATCH 32/97] =?UTF-8?q?=F0=9F=A7=AA=20Fix=20fuzzing=20test=20fai?= =?UTF-8?q?lures=20(#254)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py, fixes issues 55363 and 55367 --- markdown_it/rules_block/blockquote.py | 5 ++++- markdown_it/rules_block/fence.py | 7 +++++-- markdown_it/rules_block/hr.py | 5 ++++- markdown_it/rules_block/list.py | 5 ++++- tests/test_fuzzer.py | 24 ++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 tests/test_fuzzer.py diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index e00fbf61..965a9e73 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -23,7 +23,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): return False # check the block quote marker - if state.srcCharCode[pos] != 0x3E: # /* > */ + try: + if state.srcCharCode[pos] != 0x3E: # /* > */ + return False + except IndexError: return False pos += 1 diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index fb3c6847..53bc6f2d 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -66,8 +66,11 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): # test break - if state.srcCharCode[pos] != marker: - continue + try: + if state.srcCharCode[pos] != marker: + continue + except IndexError: + break if state.sCount[nextLine] - state.blkIndent >= 4: # closing fence should be indented less than 4 spaces diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 22c69722..953bba23 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -20,7 +20,10 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - marker = state.srcCharCode[pos] + try: + marker = state.srcCharCode[pos] + except IndexError: + return False pos += 1 # Check hr marker: /* * */ /* - */ /* _ */ diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index 9cf8c402..d9c5e554 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -13,7 +13,10 @@ def skipBulletListMarker(state: StateBlock, startLine: int): pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - marker = state.srcCharCode[pos] + try: + marker = state.srcCharCode[pos] + except IndexError: + return -1 pos += 1 # Check bullet /* * */ /* - */ /* + */ if marker != 0x2A and marker != 0x2D and marker != 0x2B: diff --git a/tests/test_fuzzer.py b/tests/test_fuzzer.py new file mode 100644 index 00000000..60cdddaa --- /dev/null +++ b/tests/test_fuzzer.py @@ -0,0 +1,24 @@ +""" +These tests are in response to reports from: +https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py + +In the future, perhaps atheris could be directly used here, +but it was not directly apparent how to integrate it into pytest. +""" +import pytest + +from markdown_it import MarkdownIt + +TESTS = { + 55363: ">```\n>", + 55367: ">-\n>\n>", + # 55371: "[](so»0;!" TODO this did not fail + # 55401: "?c_" * 100_000 TODO this did not fail +} + + +@pytest.mark.parametrize("raw_input", TESTS.values(), ids=TESTS.keys()) +def test_fuzzing(raw_input): + md = MarkdownIt() + md.parse(raw_input) + print(md.render(raw_input)) From e7172489f82b05670674707e23749c2b7ca74ca3 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 25 Mar 2023 22:56:03 +0100 Subject: [PATCH 33/97] =?UTF-8?q?=F0=9F=94=A7=20Minor=20type=20annotation?= =?UTF-8?q?=20fix=20(#259)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markdown_it/_compat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/markdown_it/_compat.py b/markdown_it/_compat.py index 12df1aa6..974d431b 100644 --- a/markdown_it/_compat.py +++ b/markdown_it/_compat.py @@ -4,7 +4,8 @@ import sys from typing import Any +DATACLASS_KWARGS: Mapping[str, Any] if sys.version_info >= (3, 10): - DATACLASS_KWARGS: Mapping[str, Any] = {"slots": True} + DATACLASS_KWARGS = {"slots": True} else: - DATACLASS_KWARGS: Mapping[str, Any] = {} + DATACLASS_KWARGS = {} From 2d46a43bd7a55310452b33f9ed2801f7b5cc4dca Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 25 Apr 2023 17:16:42 +0200 Subject: [PATCH 34/97] =?UTF-8?q?=F0=9F=A7=AA=20CI:=20Add=20fuzzing=20work?= =?UTF-8?q?flow=20for=20PRs=20(#262)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This action runs fuzzing for a brief period of time, only against the actual code added in the PR. It is intended as a relatively quick check, to guard against code introducing crashes in the Markdown parsing, which should in principle always run against any text input. See: https://google.github.io/oss-fuzz/getting-started/continuous-integration/#how-it-works --- .github/workflows/fuzz.yml | 34 ++++++++++++++++++++++++++++++++++ .github/workflows/tests.yml | 1 + 2 files changed, 35 insertions(+) create mode 100644 .github/workflows/fuzz.yml diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 00000000..7df6dc6b --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,34 @@ +name: fuzzing + +# This action runs fuzzing for a brief period of time, +# only aginst the actual code added in the PR. +# It is intended a relatively quick check, +# to guard against code introducing crashes in the Markdown parsing, +# which should in principle always run against any text input. +# See: https://google.github.io/oss-fuzz/getting-started/continuous-integration/#how-it-works + +on: + pull_request: + +jobs: + Fuzzing: + runs-on: ubuntu-latest + steps: + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'markdown-it-py' + language: python + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'markdown-it-py' + language: python + fuzz-seconds: 60 + - name: Upload Crash + uses: actions/upload-artifact@v3 + if: failure() && steps.build.outcome == 'success' + with: + name: artifacts + path: ./out/artifacts diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4ec6da14..a82b59bc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -54,6 +54,7 @@ jobs: flags: pytests file: ./coverage.xml fail_ci_if_error: true + token: ${{ secrets.CODECOV_TOKEN }} test-plugins: From 84f7be164b555584670281ec214d3cf5efb2f2d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Apr 2023 18:19:16 +0200 Subject: [PATCH 35/97] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20actions/check?= =?UTF-8?q?out=20from=202=20to=203=20(#252)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- .github/workflows/benchmark.yml | 2 +- .github/workflows/tests.yml | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index c2b82d33..68cc69bb 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v4 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a82b59bc..8f1cd70b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v4 with: @@ -34,7 +34,7 @@ jobs: python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -63,7 +63,7 @@ jobs: matrix: python-version: ['3.8'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -83,7 +83,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v4 @@ -113,7 +113,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout source - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v4 with: From 505909532975bf0b701dc1b55668df63c28199f9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Apr 2023 18:26:31 +0200 Subject: [PATCH 36/97] [pre-commit.ci] pre-commit autoupdate (#221) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 50e8132a..139ce525 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: - id: isort - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.3.0 hooks: - id: black @@ -40,7 +40,7 @@ repos: additional_dependencies: [flake8-bugbear~=22.7] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.0.1 + rev: v1.2.0 hooks: - id: mypy additional_dependencies: [mdurl] From d1852a5f8e2cd33602d670cf602593e664c7a2a7 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 25 Apr 2023 19:42:26 +0200 Subject: [PATCH 37/97] =?UTF-8?q?=F0=9F=94=A7=20Add=20tox=20env=20for=20fu?= =?UTF-8?q?zz=20testcase=20run=20(#263)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To reproduce failing test cases reported by Google's OSS-Fuzz runs --- .github/workflows/fuzz.yml | 2 ++ docs/conf.py | 2 +- scripts/build_fuzzers.py | 42 ++++++++++++++++++++++++++++++ profiler.py => scripts/profiler.py | 0 tox.ini | 9 ++++++- 5 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 scripts/build_fuzzers.py rename profiler.py => scripts/profiler.py (100%) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 7df6dc6b..a74869a5 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -6,9 +6,11 @@ name: fuzzing # to guard against code introducing crashes in the Markdown parsing, # which should in principle always run against any text input. # See: https://google.github.io/oss-fuzz/getting-started/continuous-integration/#how-it-works +# Note, to reproduce a crash locally, copy to `testcase` file` and run: `tox -e fuzz` on: pull_request: + paths-ignore: ['docs/**', 'tests/**'] jobs: Fuzzing: diff --git a/docs/conf.py b/docs/conf.py index 786eff04..52deea47 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -102,7 +102,7 @@ def run_apidoc(app): this_folder = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) api_folder = os.path.join(this_folder, "api") module_path = os.path.normpath(os.path.join(this_folder, "../")) - ignore_paths = ["../profiler.py", "../conftest.py", "../tests", "../benchmarking"] + ignore_paths = ["../scripts", "../conftest.py", "../tests", "../benchmarking"] ignore_paths = [ os.path.normpath(os.path.join(this_folder, p)) for p in ignore_paths ] diff --git a/scripts/build_fuzzers.py b/scripts/build_fuzzers.py new file mode 100644 index 00000000..3dce8ddf --- /dev/null +++ b/scripts/build_fuzzers.py @@ -0,0 +1,42 @@ +"""Build fuzzers idempotently in a given folder.""" +import argparse +from pathlib import Path +import subprocess + + +def main(): + """Build fuzzers idempotently in a given folder.""" + parser = argparse.ArgumentParser() + parser.add_argument("folder") + args = parser.parse_args() + folder = Path(args.folder) + if not folder.exists(): + print(f"Cloning google/oss-fuzz into: {folder}") + folder.mkdir(parents=True) + subprocess.check_call( + [ + "git", + "clone", + "--single-branch", + "https://github.com/google/oss-fuzz", + str(folder), + ] + ) + else: + print(f"Using google/oss-fuzz in: {folder}") + if not (folder / "build").exists(): + print(f"Building fuzzers in: {folder / 'build'}") + subprocess.check_call( + [ + "python", + str(folder / "infra" / "helper.py"), + "build_fuzzers", + "markdown-it-py", + ] + ) + else: + print(f"Using existing fuzzers in: {folder / 'build'}") + + +if __name__ == "__main__": + main() diff --git a/profiler.py b/scripts/profiler.py similarity index 100% rename from profiler.py rename to scripts/profiler.py diff --git a/tox.ini b/tox.ini index 85a7179b..bf0c8367 100644 --- a/tox.ini +++ b/tox.ini @@ -55,11 +55,18 @@ allowlist_externals = dot commands = mkdir -p "{toxworkdir}/prof" - python -m cProfile -o "{toxworkdir}/prof/output.pstats" profiler.py + python -m cProfile -o "{toxworkdir}/prof/output.pstats" scripts/profiler.py gprof2dot -f pstats -o "{toxworkdir}/prof/output.dot" "{toxworkdir}/prof/output.pstats" dot -Tsvg -o "{toxworkdir}/prof/output.svg" "{toxworkdir}/prof/output.dot" python -c 'import pathlib; print("profiler svg output under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "prof" / "output.svg"))' +[testenv:fuzz] +description = run fuzzer on testcase file +; See: https://google.github.io/oss-fuzz/ +deps = atheris +commands_pre = python scripts/build_fuzzers.py {envdir}/oss-fuzz +commands = python {envdir}/oss-fuzz/infra/helper.py reproduce markdown-it-py fuzz_markdown {posargs:testcase} + [flake8] max-line-length = 100 extend-ignore = E203 From baa8658d0c7d8eceec17b2384109a63ad27bfc05 Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Wed, 31 May 2023 18:56:44 +0100 Subject: [PATCH 38/97] =?UTF-8?q?=F0=9F=A7=AA=20Add=20OSS-Fuzz=20set=20up?= =?UTF-8?q?=20(#255)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- tests/fuzz/README.md | 41 +++++++++++++++++++++ tests/fuzz/fuzz_markdown.py | 23 ++++++++++++ tests/fuzz/fuzz_markdown_extended.py | 53 ++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 tests/fuzz/README.md create mode 100644 tests/fuzz/fuzz_markdown.py create mode 100644 tests/fuzz/fuzz_markdown_extended.py diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md new file mode 100644 index 00000000..87075a70 --- /dev/null +++ b/tests/fuzz/README.md @@ -0,0 +1,41 @@ +# OSS-Fuzz integration + +In principle, core Markdown parsing is designed to never except/crash on any input, +and so [fuzzing](https://en.wikipedia.org/wiki/Fuzzing) can be used to test this conformance. +This folder contains fuzzers which are principally run downstream as part of the infrastructure. + +Any file that matches `fuzz_*.py` in this repository will be built and run on OSS-Fuzz +(see ). + +See for full details. + +## CI integration + +Fuzzing essentially runs forever, or until a crash is found, therefore it cannot be fully integrated into local continous integration testing. +The workflow in `.github/workflows/fuzz.yml` though runs a brief fuzzing on code changed in a PR, +which can be used to provide early warning on code changes. + +## Reproducing crash failures + +If OSS-Fuzz (or the CI workflow) identifies a crash, it will produce a "minimized testcase" file +(e.g. ). + +To reproduce this crash locally, the easiest way is to run the [tox](https://tox.wiki/) environment, provided in this repository, against the test file (see `tox.ini`): + +``` +tox -e fuzz path/to/testcase +``` + +This idempotently sets up a local python environment with markdown-it-py (local dev) and [Atheris](https://pypi.org/project/atheris/) installed, +clones into it, +and builds the fuzzers. +Then the testcase is run within this environment. + +If you wish to simply run the full fuzzing process, +you can activate this environment, then run e.g.: + +``` +python .tox/fuzz/oss-fuzz/infra/helper.py run_fuzzer markdown-it-py fuzz_markdown +``` + +For a more thorough guide on reproducing, see: https://google.github.io/oss-fuzz/advanced-topics/reproducing/ diff --git a/tests/fuzz/fuzz_markdown.py b/tests/fuzz/fuzz_markdown.py new file mode 100644 index 00000000..d78ef697 --- /dev/null +++ b/tests/fuzz/fuzz_markdown.py @@ -0,0 +1,23 @@ +import sys + +import atheris + +from markdown_it import MarkdownIt + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + md = MarkdownIt() + raw_markdown = fdp.ConsumeUnicodeNoSurrogates(sys.maxsize) + md.parse(raw_markdown) + md.render(raw_markdown) + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/tests/fuzz/fuzz_markdown_extended.py b/tests/fuzz/fuzz_markdown_extended.py new file mode 100644 index 00000000..4ba749ee --- /dev/null +++ b/tests/fuzz/fuzz_markdown_extended.py @@ -0,0 +1,53 @@ +import sys + +import atheris + +# Beautified from auto-generated fuzzer at: +# https://github.com/ossf/fuzz-introspector/pull/872#issuecomment-1450847118 +# Auto-fuzz heuristics used: py-autofuzz-heuristics-4.1 +# Imports by the generated code +import markdown_it + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + val_1 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_2 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_3 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_4 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_5 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_6 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_7 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_8 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_9 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_10 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + + try: + c1 = markdown_it.main.MarkdownIt() + c1.render(val_1) + c1.parse(val_2) + c1.renderInline(val_3) + c1.parseInline(val_4) + c1.normalizeLink(val_5) + c1.normalizeLinkText(val_6) + c1.disable(val_7) + c1.enable(val_8) + c1.validateLink(val_9) + c1.configure(val_10) + except ( + ValueError, + KeyError, + TypeError, + ): + # Exceptions thrown by the hit code. + pass + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() From 1ea54577f0b3883a822f778064b6cb35708a4e1b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 20:03:53 +0200 Subject: [PATCH 39/97] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Drop=20Python=203.7?= =?UTF-8?q?=20(#264)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python 3.7 is end-of-life on 2023-06-27 https://devguide.python.org/versions/ --- .github/workflows/tests.yml | 6 +++--- .readthedocs.yml | 2 +- docs/conf.py | 2 +- markdown_it/renderer.py | 7 +------ pyproject.toml | 4 +--- tox.ini | 10 +++++----- 6 files changed, 12 insertions(+), 19 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8f1cd70b..ef1bf557 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['pypy-3.8', '3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 @@ -47,10 +47,10 @@ jobs: run: | pytest tests/ --cov=markdown_it --cov-report=xml --cov-report=term-missing - name: Upload to Codecov - if: matrix.python-version == '3.7' && github.repository == 'executablebooks/markdown-it-py' + if: matrix.python-version == '3.8' && github.repository == 'executablebooks/markdown-it-py' uses: codecov/codecov-action@v3 with: - name: markdown-it-py-pytests-py3.7 + name: markdown-it-py-pytests flags: pytests file: ./coverage.xml fail_ci_if_error: true diff --git a/.readthedocs.yml b/.readthedocs.yml index 32b74c8f..1d750008 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,7 +1,7 @@ version: 2 python: - version: 3 + version: "3.8" install: - method: pip path: . diff --git a/docs/conf.py b/docs/conf.py index 52deea47..08a6e78a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -80,7 +80,7 @@ intersphinx_mapping = { - "python": ("https://docs.python.org/3.7", None), + "python": ("https://docs.python.org/3.8", None), "mdit-py-plugins": ("https://mdit-py-plugins.readthedocs.io/en/latest/", None), } diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 81d0bc37..2d784826 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -9,17 +9,12 @@ class Renderer from collections.abc import MutableMapping, Sequence import inspect -from typing import Any, ClassVar +from typing import Any, ClassVar, Protocol from .common.utils import escapeHtml, unescapeAll from .token import Token from .utils import OptionsDict -try: - from typing import Protocol -except ImportError: # Python <3.8 doesn't have `Protocol` in the stdlib - from typing_extensions import Protocol # type: ignore - class RendererProtocol(Protocol): __output__: ClassVar[str] diff --git a/pyproject.toml b/pyproject.toml index 5cc879ed..da8d9170 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -25,10 +24,9 @@ classifiers = [ "Topic :: Text Processing :: Markup", ] keywords = ["markdown", "lexer", "parser", "commonmark", "markdown-it"] -requires-python = ">=3.7" +requires-python = ">=3.8" dependencies = [ "mdurl~=0.1", - "typing_extensions>=3.7.4;python_version<'3.8'", ] [project.urls] diff --git a/tox.ini b/tox.ini index bf0c8367..251e18df 100644 --- a/tox.ini +++ b/tox.ini @@ -4,18 +4,18 @@ # then run `tox` or `tox -- {pytest args}` # run in parallel using `tox -p` [tox] -envlist = py37 +envlist = py38 [testenv] usedevelop = true -[testenv:py{37,38,39,310,311}] +[testenv:py{38,39,310,311}] extras = linkify testing commands = pytest {posargs:tests/} -[testenv:py{37,38,39,310,311}-plugins] +[testenv:py{38,39,310,311}-plugins] extras = testing changedir = {envtmpdir} allowlist_externals = @@ -27,11 +27,11 @@ commands_pre = commands = pytest {posargs} -[testenv:py{37,38,39}-bench-core] +[testenv:py{38,39,310,311}-bench-core] extras = benchmarking commands = pytest benchmarking/bench_core.py {posargs} -[testenv:py{37,38}-bench-packages] +[testenv:py{38,39,310,311}-bench-packages] extras = benchmarking,compare commands = pytest benchmarking/bench_packages.py {posargs} From 83d66d4fb2f6161125f7302596edb4cc6b15e163 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 20:58:46 +0200 Subject: [PATCH 40/97] =?UTF-8?q?=F0=9F=94=A7=20MAINTAIN:=20Make=20type=20?= =?UTF-8?q?checking=20strict=20(#267)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit and introduce `TypeDict` allowed by Python 3.8+ --- .pre-commit-config.yaml | 6 + docs/conf.py | 23 +-- markdown_it/_punycode.py | 5 +- markdown_it/common/normalize_url.py | 2 +- markdown_it/common/utils.py | 31 +--- markdown_it/helpers/parse_link_destination.py | 2 +- markdown_it/helpers/parse_link_title.py | 4 +- markdown_it/main.py | 54 +++++-- markdown_it/parser_block.py | 27 ++-- markdown_it/parser_core.py | 2 +- markdown_it/parser_inline.py | 12 +- markdown_it/presets/__init__.py | 3 +- markdown_it/presets/commonmark.py | 3 +- markdown_it/presets/default.py | 3 +- markdown_it/presets/zero.py | 3 +- markdown_it/renderer.py | 42 +++--- markdown_it/ruler.py | 56 +++++-- markdown_it/rules_block/blockquote.py | 2 +- markdown_it/rules_block/code.py | 2 +- markdown_it/rules_block/fence.py | 2 +- markdown_it/rules_block/heading.py | 2 +- markdown_it/rules_block/hr.py | 2 +- markdown_it/rules_block/html_block.py | 4 +- markdown_it/rules_block/lheading.py | 2 +- markdown_it/rules_block/list.py | 8 +- markdown_it/rules_block/paragraph.py | 2 +- markdown_it/rules_block/reference.py | 2 +- markdown_it/rules_block/state_block.py | 19 +-- markdown_it/rules_block/table.py | 10 +- markdown_it/rules_core/replacements.py | 2 +- markdown_it/rules_core/state_core.py | 6 +- markdown_it/rules_inline/balance_pairs.py | 10 +- markdown_it/rules_inline/emphasis.py | 7 +- markdown_it/rules_inline/entity.py | 2 +- markdown_it/rules_inline/escape.py | 2 +- markdown_it/rules_inline/html_inline.py | 4 +- markdown_it/rules_inline/image.py | 2 +- markdown_it/rules_inline/link.py | 2 +- markdown_it/rules_inline/newline.py | 2 +- markdown_it/rules_inline/state_inline.py | 18 +-- markdown_it/rules_inline/strikethrough.py | 6 +- markdown_it/rules_inline/text.py | 5 +- markdown_it/rules_inline/text_collapse.py | 2 +- markdown_it/token.py | 10 +- markdown_it/tree.py | 18 ++- markdown_it/utils.py | 138 ++++++++++++------ pyproject.toml | 9 +- tests/test_api/test_main.py | 14 +- tests/test_api/test_token.py | 2 +- tests/test_linkify.py | 1 + tests/test_port/test_references.py | 2 +- tests/test_tree.py | 6 +- 52 files changed, 375 insertions(+), 230 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 139ce525..49f45ed2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,3 +44,9 @@ repos: hooks: - id: mypy additional_dependencies: [mdurl] + exclude: > + (?x)^( + benchmarking/.*\.py| + docs/.*\.py| + scripts/.*\.py| + )$ diff --git a/docs/conf.py b/docs/conf.py index 08a6e78a..e0a6e621 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,19 +44,20 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] nitpicky = True -nitpick_ignore = [ - ("py:class", "Match"), - ("py:class", "Path"), - ("py:class", "x in the interval [0, 1)."), - ("py:class", "markdown_it.helpers.parse_link_destination._Result"), - ("py:class", "markdown_it.helpers.parse_link_title._Result"), - ("py:class", "MarkdownIt"), - ("py:class", "RuleFunc"), - ("py:class", "_NodeType"), - ("py:class", "typing_extensions.Protocol"), +nitpick_ignore_regex = [ + ("py:.*", name) + for name in ( + "_ItemTV", + ".*_NodeType", + ".*Literal.*", + ".*_Result", + "EnvType", + "RuleFunc", + "Path", + "Ellipsis", + ) ] - # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for diff --git a/markdown_it/_punycode.py b/markdown_it/_punycode.py index 9ad24421..f9baad27 100644 --- a/markdown_it/_punycode.py +++ b/markdown_it/_punycode.py @@ -22,6 +22,7 @@ import codecs import re +from typing import Callable REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]") REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]") @@ -32,10 +33,10 @@ def encode(uni: str) -> str: def decode(ascii: str) -> str: - return codecs.decode(ascii, encoding="punycode") # type: ignore[call-overload] + return codecs.decode(ascii, encoding="punycode") # type: ignore -def map_domain(string, fn): +def map_domain(string: str, fn: Callable[[str], str]) -> str: parts = string.split("@") result = "" if len(parts) > 1: diff --git a/markdown_it/common/normalize_url.py b/markdown_it/common/normalize_url.py index afec9284..a4ebbaae 100644 --- a/markdown_it/common/normalize_url.py +++ b/markdown_it/common/normalize_url.py @@ -68,7 +68,7 @@ def normalizeLinkText(url: str) -> str: GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);") -def validateLink(url: str, validator: Callable | None = None) -> bool: +def validateLink(url: str, validator: Callable[[str], bool] | None = None) -> bool: """Validate URL link is allowed in output. This validator can prohibit more than really needed to prevent XSS. diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 9b7c4aeb..ed862e74 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -1,8 +1,10 @@ """Utilities for parsing source text """ +from __future__ import annotations + import html import re -from typing import Any +from typing import Any, Match, TypeVar from .entities import entities @@ -22,29 +24,12 @@ def charCodeAt(src: str, pos: int) -> Any: return None -# Merge objects -# -def assign(obj): - """Merge objects /*from1, from2, from3, ...*/)""" - raise NotImplementedError - # sources = Array.prototype.slice.call(arguments, 1) - - # sources.forEach(function (source) { - # if (!source) { return; } - - # if (typeof source !== 'object') { - # throw new TypeError(source + 'must be object') - # } - - # Object.keys(source).forEach(function (key) { - # obj[key] = source[key] - # }) - # }) - - # return obj +_ItemTV = TypeVar("_ItemTV") -def arrayReplaceAt(src: list, pos: int, newElements: list) -> list: +def arrayReplaceAt( + src: list[_ItemTV], pos: int, newElements: list[_ItemTV] +) -> list[_ItemTV]: """ Remove element from array and put another array at those position. Useful for some operations with tokens @@ -133,7 +118,7 @@ def unescapeMd(string: str) -> str: def unescapeAll(string: str) -> str: - def replacer_func(match): + def replacer_func(match: Match[str]) -> str: escaped = match.group(1) if escaped: return escaped diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index 58b76f3c..d527ce0c 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -8,7 +8,7 @@ class _Result: __slots__ = ("ok", "pos", "lines", "str") - def __init__(self): + def __init__(self) -> None: self.ok = False self.pos = 0 self.lines = 0 diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py index 842c83bc..8f589336 100644 --- a/markdown_it/helpers/parse_link_title.py +++ b/markdown_it/helpers/parse_link_title.py @@ -6,13 +6,13 @@ class _Result: __slots__ = ("ok", "pos", "lines", "str") - def __init__(self): + def __init__(self) -> None: self.ok = False self.pos = 0 self.lines = 0 self.str = "" - def __str__(self): + def __str__(self) -> str: return self.str diff --git a/markdown_it/main.py b/markdown_it/main.py index 7faac5ad..acf8d079 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -2,7 +2,7 @@ from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping from contextlib import contextmanager -from typing import Any +from typing import Any, Literal, overload from . import helpers, presets # noqa F401 from .common import normalize_url, utils # noqa F401 @@ -12,7 +12,7 @@ from .renderer import RendererHTML, RendererProtocol from .rules_core.state_core import StateCore from .token import Token -from .utils import OptionsDict +from .utils import EnvType, OptionsDict, OptionsType, PresetType try: import linkify_it @@ -20,7 +20,7 @@ linkify_it = None -_PRESETS = { +_PRESETS: dict[str, PresetType] = { "default": presets.default.make(), "js-default": presets.js_default.make(), "zero": presets.zero.make(), @@ -32,8 +32,8 @@ class MarkdownIt: def __init__( self, - config: str | Mapping = "commonmark", - options_update: Mapping | None = None, + config: str | PresetType = "commonmark", + options_update: Mapping[str, Any] | None = None, *, renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML, ): @@ -67,6 +67,26 @@ def __init__( def __repr__(self) -> str: return f"{self.__class__.__module__}.{self.__class__.__name__}()" + @overload + def __getitem__(self, name: Literal["inline"]) -> ParserInline: + ... + + @overload + def __getitem__(self, name: Literal["block"]) -> ParserBlock: + ... + + @overload + def __getitem__(self, name: Literal["core"]) -> ParserCore: + ... + + @overload + def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol: + ... + + @overload + def __getitem__(self, name: str) -> Any: + ... + def __getitem__(self, name: str) -> Any: return { "inline": self.inline, @@ -75,7 +95,7 @@ def __getitem__(self, name: str) -> Any: "renderer": self.renderer, }[name] - def set(self, options: MutableMapping) -> None: + def set(self, options: OptionsType) -> None: """Set parser options (in the same format as in constructor). Probably, you will never need it, but you can change options after constructor call. @@ -86,7 +106,7 @@ def set(self, options: MutableMapping) -> None: self.options = OptionsDict(options) def configure( - self, presets: str | Mapping, options_update: Mapping | None = None + self, presets: str | PresetType, options_update: Mapping[str, Any] | None = None ) -> MarkdownIt: """Batch load of all options and component settings. This is an internal method, and you probably will not need it. @@ -108,9 +128,9 @@ def configure( options = config.get("options", {}) or {} if options_update: - options = {**options, **options_update} + options = {**options, **options_update} # type: ignore - self.set(options) + self.set(options) # type: ignore if "components" in config: for name, component in config["components"].items(): @@ -206,7 +226,9 @@ def reset_rules(self) -> Generator[None, None, None]: self[chain].ruler.enableOnly(rules) self.inline.ruler2.enableOnly(chain_rules["inline2"]) - def add_render_rule(self, name: str, function: Callable, fmt: str = "html") -> None: + def add_render_rule( + self, name: str, function: Callable[..., Any], fmt: str = "html" + ) -> None: """Add a rule for rendering a particular Token type. Only applied when ``renderer.__output__ == fmt`` @@ -214,7 +236,9 @@ def add_render_rule(self, name: str, function: Callable, fmt: str = "html") -> N if self.renderer.__output__ == fmt: self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore - def use(self, plugin: Callable, *params, **options) -> MarkdownIt: + def use( + self, plugin: Callable[..., None], *params: Any, **options: Any + ) -> MarkdownIt: """Load specified plugin with given params into current parser instance. (chainable) It's just a sugar to call `plugin(md, params)` with curring. @@ -229,7 +253,7 @@ def func(tokens, idx): plugin(self, *params, **options) return self - def parse(self, src: str, env: MutableMapping | None = None) -> list[Token]: + def parse(self, src: str, env: EnvType | None = None) -> list[Token]: """Parse the source string to a token stream :param src: source string @@ -252,7 +276,7 @@ def parse(self, src: str, env: MutableMapping | None = None) -> list[Token]: self.core.process(state) return state.tokens - def render(self, src: str, env: MutableMapping | None = None) -> Any: + def render(self, src: str, env: EnvType | None = None) -> Any: """Render markdown string into html. It does all magic for you :). :param src: source string @@ -266,7 +290,7 @@ def render(self, src: str, env: MutableMapping | None = None) -> Any: env = {} if env is None else env return self.renderer.render(self.parse(src, env), self.options, env) - def parseInline(self, src: str, env: MutableMapping | None = None) -> list[Token]: + def parseInline(self, src: str, env: EnvType | None = None) -> list[Token]: """The same as [[MarkdownIt.parse]] but skip all block rules. :param src: source string @@ -286,7 +310,7 @@ def parseInline(self, src: str, env: MutableMapping | None = None) -> list[Token self.core.process(state) return state.tokens - def renderInline(self, src: str, env: MutableMapping | None = None) -> Any: + def renderInline(self, src: str, env: EnvType | None = None) -> Any: """Similar to [[MarkdownIt.render]] but for single paragraph content. :param src: source string diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index f331ec54..cd240a8a 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -2,20 +2,25 @@ from __future__ import annotations import logging +from typing import TYPE_CHECKING, Any from . import rules_block from .ruler import Ruler from .rules_block.state_block import StateBlock from .token import Token +from .utils import EnvType + +if TYPE_CHECKING: + from markdown_it import MarkdownIt LOGGER = logging.getLogger(__name__) -_rules: list[tuple] = [ +_rules: list[tuple[str, Any, list[str]]] = [ # First 2 params - rule name & source. Secondary array - list of rules, # which can be terminated by this one. ("table", rules_block.table, ["paragraph", "reference"]), - ("code", rules_block.code), + ("code", rules_block.code, []), ("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]), ( "blockquote", @@ -24,11 +29,11 @@ ), ("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]), ("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]), - ("reference", rules_block.reference), + ("reference", rules_block.reference, []), ("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]), ("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]), - ("lheading", rules_block.lheading), - ("paragraph", rules_block.paragraph), + ("lheading", rules_block.lheading, []), + ("paragraph", rules_block.paragraph, []), ] @@ -39,12 +44,10 @@ class ParserBlock: [[Ruler]] instance. Keep configuration of block rules. """ - def __init__(self): + def __init__(self) -> None: self.ruler = Ruler() - for data in _rules: - name = data[0] - rule = data[1] - self.ruler.push(name, rule, {"alt": data[2] if len(data) > 2 else []}) + for name, rule, alt in _rules: + self.ruler.push(name, rule, {"alt": alt}) def tokenize( self, state: StateBlock, startLine: int, endLine: int, silent: bool = False @@ -96,8 +99,8 @@ def tokenize( def parse( self, src: str, - md, - env, + md: MarkdownIt, + env: EnvType, outTokens: list[Token], ords: tuple[int, ...] | None = None, ) -> list[Token] | None: diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index 32209b32..251b7634 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -21,7 +21,7 @@ class ParserCore: - def __init__(self): + def __init__(self) -> None: self.ruler = Ruler() for name, rule in _rules: self.ruler.push(name, rule) diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index b61c990b..a8228524 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -2,10 +2,16 @@ """ from __future__ import annotations +from typing import TYPE_CHECKING + from . import rules_inline from .ruler import RuleFunc, Ruler from .rules_inline.state_inline import StateInline from .token import Token +from .utils import EnvType + +if TYPE_CHECKING: + from markdown_it import MarkdownIt # Parser rules _rules: list[tuple[str, RuleFunc]] = [ @@ -31,7 +37,7 @@ class ParserInline: - def __init__(self): + def __init__(self) -> None: self.ruler = Ruler() for name, rule in _rules: self.ruler.push(name, rule) @@ -114,7 +120,9 @@ def tokenize(self, state: StateInline) -> None: if state.pending: state.pushPending() - def parse(self, src: str, md, env, tokens: list[Token]) -> list[Token]: + def parse( + self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token] + ) -> list[Token]: """Process input string and push inline tokens into `tokens`""" state = StateInline(src, md, env, tokens) self.tokenize(state) diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py index 16f10e51..22cf74cb 100644 --- a/markdown_it/presets/__init__.py +++ b/markdown_it/presets/__init__.py @@ -1,6 +1,7 @@ __all__ = ("commonmark", "default", "zero", "js_default", "gfm_like") from . import commonmark, default, zero +from ..utils import PresetType js_default = default @@ -16,7 +17,7 @@ class gfm_like: """ @staticmethod - def make(): + def make() -> PresetType: config = commonmark.make() config["components"]["core"]["rules"].append("linkify") config["components"]["block"]["rules"].append("table") diff --git a/markdown_it/presets/commonmark.py b/markdown_it/presets/commonmark.py index e44b66bb..60a39250 100644 --- a/markdown_it/presets/commonmark.py +++ b/markdown_it/presets/commonmark.py @@ -6,9 +6,10 @@ - block: table - inline: strikethrough """ +from ..utils import PresetType -def make(): +def make() -> PresetType: return { "options": { "maxNesting": 20, # Internal protection, recursion limit diff --git a/markdown_it/presets/default.py b/markdown_it/presets/default.py index 59f4855e..c9ab902d 100644 --- a/markdown_it/presets/default.py +++ b/markdown_it/presets/default.py @@ -1,7 +1,8 @@ """markdown-it default options.""" +from ..utils import PresetType -def make(): +def make() -> PresetType: return { "options": { "maxNesting": 100, # Internal protection, recursion limit diff --git a/markdown_it/presets/zero.py b/markdown_it/presets/zero.py index af1d9c7f..fcc5eb3a 100644 --- a/markdown_it/presets/zero.py +++ b/markdown_it/presets/zero.py @@ -2,9 +2,10 @@ "Zero" preset, with nothing enabled. Useful for manual configuring of simple modes. For example, to parse bold/italic only. """ +from ..utils import PresetType -def make(): +def make() -> PresetType: return { "options": { "maxNesting": 20, # Internal protection, recursion limit diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 2d784826..4cddbc67 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -7,20 +7,20 @@ class Renderer """ from __future__ import annotations -from collections.abc import MutableMapping, Sequence +from collections.abc import Sequence import inspect from typing import Any, ClassVar, Protocol from .common.utils import escapeHtml, unescapeAll from .token import Token -from .utils import OptionsDict +from .utils import EnvType, OptionsDict class RendererProtocol(Protocol): __output__: ClassVar[str] def render( - self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping + self, tokens: Sequence[Token], options: OptionsDict, env: EnvType ) -> Any: ... @@ -57,7 +57,7 @@ def strong_close(self, tokens, idx, options, env): __output__ = "html" - def __init__(self, parser=None): + def __init__(self, parser: Any = None): self.rules = { k: v for k, v in inspect.getmembers(self, predicate=inspect.ismethod) @@ -65,7 +65,7 @@ def __init__(self, parser=None): } def render( - self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping + self, tokens: Sequence[Token], options: OptionsDict, env: EnvType ) -> str: """Takes token stream and generates HTML. @@ -88,7 +88,7 @@ def render( return result def renderInline( - self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping + self, tokens: Sequence[Token], options: OptionsDict, env: EnvType ) -> str: """The same as ``render``, but for single token of `inline` type. @@ -111,7 +111,7 @@ def renderToken( tokens: Sequence[Token], idx: int, options: OptionsDict, - env: MutableMapping, + env: EnvType, ) -> str: """Default token renderer. @@ -184,7 +184,7 @@ def renderInlineAsText( self, tokens: Sequence[Token] | None, options: OptionsDict, - env: MutableMapping, + env: EnvType, ) -> str: """Special kludge for image `alt` attributes to conform CommonMark spec. @@ -210,7 +210,9 @@ def renderInlineAsText( ################################################### - def code_inline(self, tokens: Sequence[Token], idx: int, options, env) -> str: + def code_inline( + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType + ) -> str: token = tokens[idx] return ( " str: token = tokens[idx] @@ -242,7 +244,7 @@ def fence( tokens: Sequence[Token], idx: int, options: OptionsDict, - env: MutableMapping, + env: EnvType, ) -> str: token = tokens[idx] info = unescapeAll(token.info).strip() if token.info else "" @@ -294,7 +296,7 @@ def image( tokens: Sequence[Token], idx: int, options: OptionsDict, - env: MutableMapping, + env: EnvType, ) -> str: token = tokens[idx] @@ -308,22 +310,28 @@ def image( return self.renderToken(tokens, idx, options, env) def hardbreak( - self, tokens: Sequence[Token], idx: int, options: OptionsDict, *args + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType ) -> str: return "
\n" if options.xhtmlOut else "
\n" def softbreak( - self, tokens: Sequence[Token], idx: int, options: OptionsDict, *args + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType ) -> str: return ( ("
\n" if options.xhtmlOut else "
\n") if options.breaks else "\n" ) - def text(self, tokens: Sequence[Token], idx: int, *args) -> str: + def text( + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType + ) -> str: return escapeHtml(tokens[idx].content) - def html_block(self, tokens: Sequence[Token], idx: int, *args) -> str: + def html_block( + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType + ) -> str: return tokens[idx].content - def html_inline(self, tokens: Sequence[Token], idx: int, *args) -> str: + def html_inline( + self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType + ) -> str: return tokens[idx].content diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 11b937a0..421666cc 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -17,12 +17,14 @@ class Ruler """ from __future__ import annotations -from collections.abc import Callable, Iterable, MutableMapping +from collections.abc import Callable, Iterable from dataclasses import dataclass, field -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypedDict from markdown_it._compat import DATACLASS_KWARGS +from .utils import EnvType + if TYPE_CHECKING: from markdown_it import MarkdownIt @@ -30,7 +32,7 @@ class Ruler class StateBase: srcCharCode: tuple[int, ...] - def __init__(self, src: str, md: MarkdownIt, env: MutableMapping): + def __init__(self, src: str, md: MarkdownIt, env: EnvType): self.src = src self.env = env self.md = md @@ -49,7 +51,11 @@ def src(self, value: str) -> None: # arguments may or may not exist, based on the rule's type (block, # core, inline). Return type is either `None` or `bool` based on the # rule's type. -RuleFunc = Callable +RuleFunc = Callable # type: ignore + + +class RuleOptionsType(TypedDict, total=False): + alt: list[str] @dataclass(**DATACLASS_KWARGS) @@ -61,7 +67,7 @@ class Rule: class Ruler: - def __init__(self): + def __init__(self) -> None: # List of added rules. self.__rules__: list[Rule] = [] # Cached rule chains. @@ -95,7 +101,9 @@ def __compile__(self) -> None: continue self.__cache__[chain].append(rule.fn) - def at(self, ruleName: str, fn: RuleFunc, options=None): + def at( + self, ruleName: str, fn: RuleFunc, options: RuleOptionsType | None = None + ) -> None: """Replace rule by name with new function & options. :param ruleName: rule name to replace. @@ -111,7 +119,13 @@ def at(self, ruleName: str, fn: RuleFunc, options=None): self.__rules__[index].alt = options.get("alt", []) self.__cache__ = None - def before(self, beforeName: str, ruleName: str, fn: RuleFunc, options=None): + def before( + self, + beforeName: str, + ruleName: str, + fn: RuleFunc, + options: RuleOptionsType | None = None, + ) -> None: """Add new rule to chain before one with given name. :param beforeName: new rule will be added before this one. @@ -127,7 +141,13 @@ def before(self, beforeName: str, ruleName: str, fn: RuleFunc, options=None): self.__rules__.insert(index, Rule(ruleName, True, fn, options.get("alt", []))) self.__cache__ = None - def after(self, afterName: str, ruleName: str, fn: RuleFunc, options=None): + def after( + self, + afterName: str, + ruleName: str, + fn: RuleFunc, + options: RuleOptionsType | None = None, + ) -> None: """Add new rule to chain after one with given name. :param afterName: new rule will be added after this one. @@ -145,7 +165,9 @@ def after(self, afterName: str, ruleName: str, fn: RuleFunc, options=None): ) self.__cache__ = None - def push(self, ruleName: str, fn: RuleFunc, options=None): + def push( + self, ruleName: str, fn: RuleFunc, options: RuleOptionsType | None = None + ) -> None: """Push new rule to the end of chain. :param ruleName: new rule will be added to the end of chain. @@ -156,7 +178,9 @@ def push(self, ruleName: str, fn: RuleFunc, options=None): self.__rules__.append(Rule(ruleName, True, fn, (options or {}).get("alt", []))) self.__cache__ = None - def enable(self, names: str | Iterable[str], ignoreInvalid: bool = False): + def enable( + self, names: str | Iterable[str], ignoreInvalid: bool = False + ) -> list[str]: """Enable rules with given names. :param names: name or list of rule names to enable. @@ -166,7 +190,7 @@ def enable(self, names: str | Iterable[str], ignoreInvalid: bool = False): """ if isinstance(names, str): names = [names] - result = [] + result: list[str] = [] for name in names: idx = self.__find__(name) if (idx < 0) and ignoreInvalid: @@ -178,7 +202,9 @@ def enable(self, names: str | Iterable[str], ignoreInvalid: bool = False): self.__cache__ = None return result - def enableOnly(self, names: str | Iterable[str], ignoreInvalid: bool = False): + def enableOnly( + self, names: str | Iterable[str], ignoreInvalid: bool = False + ) -> list[str]: """Enable rules with given names, and disable everything else. :param names: name or list of rule names to enable. @@ -190,9 +216,11 @@ def enableOnly(self, names: str | Iterable[str], ignoreInvalid: bool = False): names = [names] for rule in self.__rules__: rule.enabled = False - self.enable(names, ignoreInvalid) + return self.enable(names, ignoreInvalid) - def disable(self, names: str | Iterable[str], ignoreInvalid: bool = False): + def disable( + self, names: str | Iterable[str], ignoreInvalid: bool = False + ) -> list[str]: """Disable rules with given names. :param names: name or list of rule names to enable. diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 965a9e73..3ca0321c 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -9,7 +9,7 @@ LOGGER = logging.getLogger(__name__) -def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): +def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug( "entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent ) diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py index a796608d..69bd6bdc 100644 --- a/markdown_it/rules_block/code.py +++ b/markdown_it/rules_block/code.py @@ -6,7 +6,7 @@ LOGGER = logging.getLogger(__name__) -def code(state: StateBlock, startLine: int, endLine: int, silent: bool = False): +def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent) if state.sCount[startLine] - state.blkIndent < 4: diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index 53bc6f2d..2bdd95f8 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -6,7 +6,7 @@ LOGGER = logging.getLogger(__name__) -def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): +def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent) haveEndMarker = False diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 064d0702..564e1726 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -9,7 +9,7 @@ LOGGER = logging.getLogger(__name__) -def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): +def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent) pos = state.bMarks[startLine] + state.tShift[startLine] diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 953bba23..72ea010d 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -10,7 +10,7 @@ LOGGER = logging.getLogger(__name__) -def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): +def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent) pos = state.bMarks[startLine] + state.tShift[startLine] diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index 31afab76..4831f562 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -12,7 +12,7 @@ # An array of opening and corresponding closing sequences for html tags, # last argument defines whether it can terminate a paragraph or not -HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [ +HTML_SEQUENCES: list[tuple[re.Pattern[str], re.Pattern[str], bool]] = [ ( re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE), re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE), @@ -31,7 +31,7 @@ ] -def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool): +def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug( "entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent ) diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index 92632acc..a3806f8e 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -7,7 +7,7 @@ LOGGER = logging.getLogger(__name__) -def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool): +def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent) level = None diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index d9c5e554..1592b599 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -9,7 +9,7 @@ # Search `[-+*][\n ]`, returns next pos after marker on success # or -1 on fail. -def skipBulletListMarker(state: StateBlock, startLine: int): +def skipBulletListMarker(state: StateBlock, startLine: int) -> int: pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] @@ -34,7 +34,7 @@ def skipBulletListMarker(state: StateBlock, startLine: int): # Search `\d+[.)][\n ]`, returns next pos after marker on success # or -1 on fail. -def skipOrderedListMarker(state: StateBlock, startLine: int): +def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: start = state.bMarks[startLine] + state.tShift[startLine] pos = start maximum = state.eMarks[startLine] @@ -83,7 +83,7 @@ def skipOrderedListMarker(state: StateBlock, startLine: int): return pos -def markTightParagraphs(state: StateBlock, idx: int): +def markTightParagraphs(state: StateBlock, idx: int) -> None: level = state.level + 2 i = idx + 2 @@ -96,7 +96,7 @@ def markTightParagraphs(state: StateBlock, idx: int): i += 1 -def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): +def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent) isTerminatingParagraph = False diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py index fef7edf7..3c7d43d3 100644 --- a/markdown_it/rules_block/paragraph.py +++ b/markdown_it/rules_block/paragraph.py @@ -7,7 +7,7 @@ LOGGER = logging.getLogger(__name__) -def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool = False): +def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug( "entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent ) diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 39e21eb6..5689064b 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -6,7 +6,7 @@ LOGGER = logging.getLogger(__name__) -def reference(state: StateBlock, startLine, _endLine, silent): +def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> bool: LOGGER.debug( "entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent ) diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index c5589149..7ddf806c 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -1,10 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal from ..common.utils import isSpace from ..ruler import StateBase from ..token import Token +from ..utils import EnvType if TYPE_CHECKING: from markdown_it.main import MarkdownIt @@ -15,7 +16,7 @@ def __init__( self, src: str, md: MarkdownIt, - env, + env: EnvType, tokens: list[Token], srcCharCode: tuple[int, ...] | None = None, ): @@ -36,11 +37,11 @@ def __init__( self.tokens = tokens - self.bMarks = [] # line begin offsets for fast jumps - self.eMarks = [] # line end offsets for fast jumps + self.bMarks: list[int] = [] # line begin offsets for fast jumps + self.eMarks: list[int] = [] # line end offsets for fast jumps # offsets of the first non-space characters (tabs not expanded) - self.tShift = [] - self.sCount = [] # indents for each line (tabs expanded) + self.tShift: list[int] = [] + self.sCount: list[int] = [] # indents for each line (tabs expanded) # An amount of virtual spaces (tabs expanded) between beginning # of each line (bMarks) and real beginning of that line. @@ -52,7 +53,7 @@ def __init__( # an initial tab length, e.g. bsCount=21 applied to string `\t123` # means first tab should be expanded to 4-21%4 === 3 spaces. # - self.bsCount = [] + self.bsCount: list[int] = [] # block parser variables self.blkIndent = 0 # required block content indent (for example, if we are @@ -115,13 +116,13 @@ def __init__( self.lineMax = len(self.bMarks) - 1 # don't count last fake line - def __repr__(self): + def __repr__(self) -> str: return ( f"{self.__class__.__name__}" f"(line={self.line},level={self.level},tokens={len(self.tokens)})" ) - def push(self, ttype: str, tag: str, nesting: int) -> Token: + def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token: """Push new token to "stream".""" token = Token(ttype, tag, nesting) token.block = True diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index e3db8584..c432d44f 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -1,4 +1,6 @@ # GFM table, https://github.github.com/gfm/#tables-extension- +from __future__ import annotations + import re from ..common.utils import charCodeAt, isSpace @@ -8,7 +10,7 @@ enclosingPipesRe = re.compile(r"^\||\|$") -def getLine(state: StateBlock, line: int): +def getLine(state: StateBlock, line: int) -> str: pos = state.bMarks[line] + state.tShift[line] maximum = state.eMarks[line] @@ -16,8 +18,8 @@ def getLine(state: StateBlock, line: int): return state.src[pos:maximum] -def escapedSplit(string): - result = [] +def escapedSplit(string: str) -> list[str]: + result: list[str] = [] pos = 0 max = len(string) isEscaped = False @@ -47,7 +49,7 @@ def escapedSplit(string): return result -def table(state: StateBlock, startLine: int, endLine: int, silent: bool): +def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: tbodyLines = None # should have at least two lines diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index 5e9b7ae7..e5d81c7a 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -56,7 +56,7 @@ SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"} -def replaceFn(match: re.Match[str]): +def replaceFn(match: re.Match[str]) -> str: return SCOPED_ABBR[match.group(1).lower()] diff --git a/markdown_it/rules_core/state_core.py b/markdown_it/rules_core/state_core.py index 15b7c605..a938041d 100644 --- a/markdown_it/rules_core/state_core.py +++ b/markdown_it/rules_core/state_core.py @@ -1,10 +1,10 @@ from __future__ import annotations -from collections.abc import MutableMapping from typing import TYPE_CHECKING from ..ruler import StateBase from ..token import Token +from ..utils import EnvType if TYPE_CHECKING: from markdown_it import MarkdownIt @@ -15,9 +15,9 @@ def __init__( self, src: str, md: MarkdownIt, - env: MutableMapping, + env: EnvType, tokens: list[Token] | None = None, - ): + ) -> None: self.src = src self.md = md # link to parser instance self.env = env diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index 5423b5d6..ce0a0884 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -1,9 +1,11 @@ -# For each opening emphasis-like marker find a matching closing one -# -from .state_inline import StateInline +"""Balance paired characters (*, _, etc) in inline tokens.""" +from __future__ import annotations +from .state_inline import Delimiter, StateInline -def processDelimiters(state: StateInline, delimiters, *args): + +def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: + """For each opening emphasis-like marker find a matching closing one.""" openersBottom = {} maximum = len(delimiters) diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index 5262430b..d21b494c 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -1,10 +1,11 @@ # Process *this* and _that_ # +from __future__ import annotations from .state_inline import Delimiter, StateInline -def tokenize(state: StateInline, silent: bool): +def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos marker = state.srcCharCode[start] @@ -38,7 +39,7 @@ def tokenize(state: StateInline, silent: bool): return True -def _postProcess(state, delimiters): +def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: i = len(delimiters) - 1 while i >= 0: startDelim = delimiters[i] @@ -92,7 +93,7 @@ def _postProcess(state, delimiters): i -= 1 -def postProcess(state: StateInline): +def postProcess(state: StateInline) -> None: """Walk through delimiter list and replace text tokens with tags.""" _postProcess(state, state.delimiters) diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 08d271ed..9c4c6a0e 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -9,7 +9,7 @@ NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE) -def entity(state: StateInline, silent: bool): +def entity(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 36bd0402..1767e01d 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -9,7 +9,7 @@ ESCAPED[ord(ch)] = 1 -def escape(state: StateInline, silent: bool): +def escape(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index b875e884..6a636684 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -3,13 +3,13 @@ from .state_inline import StateInline -def isLetter(ch: int): +def isLetter(ch: int) -> bool: lc = ch | 0x20 # to lower case # /* a */ and /* z */ return (lc >= 0x61) and (lc <= 0x7A) -def html_inline(state: StateInline, silent: bool): +def html_inline(state: StateInline, silent: bool) -> bool: pos = state.pos if not state.md.options.get("html", None): diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index d7215bdf..0cb14ffd 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -6,7 +6,7 @@ from .state_inline import StateInline -def image(state: StateInline, silent: bool): +def image(state: StateInline, silent: bool) -> bool: label = None href = "" oldPos = state.pos diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index a6345152..c4548ccd 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -4,7 +4,7 @@ from .state_inline import StateInline -def link(state: StateInline, silent: bool): +def link(state: StateInline, silent: bool) -> bool: href = "" title = "" label = None diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index 3034e408..4c440579 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -7,7 +7,7 @@ endSpace = re.compile(r" +$") -def newline(state: StateInline, silent: bool): +def newline(state: StateInline, silent: bool) -> bool: pos = state.pos # /* \n */ diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 283532cc..7c1cb1f3 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -1,14 +1,14 @@ from __future__ import annotations from collections import namedtuple -from collections.abc import MutableMapping from dataclasses import dataclass -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Literal from .._compat import DATACLASS_KWARGS from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace from ..ruler import StateBase from ..token import Token +from ..utils import EnvType if TYPE_CHECKING: from markdown_it import MarkdownIt @@ -50,13 +50,13 @@ class Delimiter: class StateInline(StateBase): def __init__( - self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token] - ): + self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token] + ) -> None: self.src = src self.env = env self.md = md self.tokens = outTokens - self.tokens_meta: list[dict | None] = [None] * len(outTokens) + self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens) self.pos = 0 self.posMax = len(self.src) @@ -78,13 +78,13 @@ def __init__( self.backticks: dict[int, int] = {} self.backticksScanned = False - def __repr__(self): + def __repr__(self) -> str: return ( f"{self.__class__.__name__}" f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})" ) - def pushPending(self): + def pushPending(self) -> Token: token = Token("text", "", 0) token.content = self.pending token.level = self.pendingLevel @@ -92,7 +92,7 @@ def pushPending(self): self.pending = "" return token - def push(self, ttype, tag, nesting): + def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token: """Push new token to "stream". If pending text exists - flush it as text token """ @@ -121,7 +121,7 @@ def push(self, ttype, tag, nesting): self.tokens_meta.append(token_meta) return token - def scanDelims(self, start, canSplitWord): + def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: """ Scan a sequence of emphasis-like markers, and determine whether it can start an emphasis sequence or end an emphasis sequence. diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index 9b062a66..8b080816 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -4,7 +4,7 @@ from .state_inline import Delimiter, StateInline -def tokenize(state: StateInline, silent: bool): +def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos marker = state.srcCharCode[start] @@ -52,7 +52,7 @@ def tokenize(state: StateInline, silent: bool): return True -def _postProcess(state: StateInline, delimiters: list[Delimiter]): +def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: loneMarkers = [] maximum = len(delimiters) @@ -113,7 +113,7 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]): state.tokens[i] = token -def postProcess(state: StateInline): +def postProcess(state: StateInline) -> None: """Walk through delimiter list and replace text tokens with tags.""" tokens_meta = state.tokens_meta maximum = len(state.tokens_meta) diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index ec6ee0fa..bdf55310 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -1,5 +1,6 @@ # Skip text characters for text token, place those to pending buffer # and increment current pos +from typing import Any from .state_inline import StateInline @@ -12,7 +13,7 @@ # http://spec.commonmark.org/0.15/#ascii-punctuation-character -def isTerminatorChar(ch): +def isTerminatorChar(ch: int) -> bool: return ch in { 0x0A, # /* \n */: 0x21, # /* ! */: @@ -40,7 +41,7 @@ def isTerminatorChar(ch): } -def text(state: StateInline, silent: bool, **args): +def text(state: StateInline, silent: bool, **args: Any) -> bool: pos = state.pos posMax = state.posMax while (pos < posMax) and not isTerminatorChar(state.srcCharCode[pos]): diff --git a/markdown_it/rules_inline/text_collapse.py b/markdown_it/rules_inline/text_collapse.py index 6d0c0ab6..e09289cf 100644 --- a/markdown_it/rules_inline/text_collapse.py +++ b/markdown_it/rules_inline/text_collapse.py @@ -1,7 +1,7 @@ from .state_inline import StateInline -def text_collapse(state: StateInline, *args): +def text_collapse(state: StateInline) -> None: """ Clean up tokens after emphasis and strikethrough postprocessing: merge adjacent text nodes into one and re-calculate all token levels diff --git a/markdown_it/token.py b/markdown_it/token.py index 7a41a784..e3f6c9b9 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -2,7 +2,7 @@ from collections.abc import Callable, MutableMapping import dataclasses as dc -from typing import Any +from typing import Any, Literal import warnings from markdown_it._compat import DATACLASS_KWARGS @@ -28,7 +28,7 @@ class Token: tag: str """HTML tag name, e.g. 'p'""" - nesting: int + nesting: Literal[-1, 0, 1] """Level change (number in {-1, 0, 1} set), where: - `1` means the tag is opening - `0` means the tag is self-closing @@ -63,7 +63,7 @@ class Token: - The string value of the item marker for ordered-list "list_item_open" tokens """ - meta: dict = dc.field(default_factory=dict) + meta: dict[Any, Any] = dc.field(default_factory=dict) """A place for plugins to store any arbitrary data""" block: bool = False @@ -76,7 +76,7 @@ class Token: Used for tight lists to hide paragraphs. """ - def __post_init__(self): + def __post_init__(self) -> None: self.attrs = convert_attrs(self.attrs) def attrIndex(self, name: str) -> int: @@ -129,7 +129,7 @@ def as_dict( *, children: bool = True, as_upstream: bool = True, - meta_serializer: Callable[[dict], Any] | None = None, + meta_serializer: Callable[[dict[Any, Any]], Any] | None = None, filter: Callable[[str, Any], bool] | None = None, dict_factory: Callable[..., MutableMapping[str, Any]] = dict, ) -> MutableMapping[str, Any]: diff --git a/markdown_it/tree.py b/markdown_it/tree.py index 09476b22..a39ba32a 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -9,7 +9,6 @@ from typing import Any, NamedTuple, TypeVar, overload from .token import Token -from .utils import _removesuffix class _NesterTokens(NamedTuple): @@ -51,7 +50,7 @@ def __init__( # Empty list unless a non-empty container, or unnested token that has # children (i.e. inline or img) - self._children: list = [] + self._children: list[Any] = [] if create_root: self._set_children_from_tokens(tokens) @@ -119,7 +118,7 @@ def children(self: _NodeType, value: list[_NodeType]) -> None: @property def parent(self: _NodeType) -> _NodeType | None: - return self._parent + return self._parent # type: ignore @parent.setter def parent(self: _NodeType, value: _NodeType | None) -> None: @@ -314,7 +313,7 @@ def info(self) -> str: return self._attribute_token().info @property - def meta(self) -> dict: + def meta(self) -> dict[Any, Any]: """A place for plugins to store an arbitrary data.""" return self._attribute_token().meta @@ -328,3 +327,14 @@ def hidden(self) -> bool: """If it's true, ignore this element when rendering. Used for tight lists to hide paragraphs.""" return self._attribute_token().hidden + + +def _removesuffix(string: str, suffix: str) -> str: + """Remove a suffix from a string. + + Replace this with str.removesuffix() from stdlib when minimum Python + version is 3.9. + """ + if suffix and string.endswith(suffix): + return string[: -len(suffix)] + return string diff --git a/markdown_it/utils.py b/markdown_it/utils.py index 2ba2995a..a9793720 100644 --- a/markdown_it/utils.py +++ b/markdown_it/utils.py @@ -1,95 +1,160 @@ from __future__ import annotations -from collections.abc import Callable +from collections.abc import MutableMapping as MutableMappingABC from pathlib import Path +from typing import Any, Callable, Iterable, MutableMapping, TypedDict, cast + +EnvType = MutableMapping[str, Any] # note: could use TypeAlias in python 3.10 +"""Type for the environment sandbox used in parsing and rendering, +which stores mutable variables for use by plugins and rules. +""" + + +class OptionsType(TypedDict): + """Options for parsing.""" + + maxNesting: int + """Internal protection, recursion limit.""" + html: bool + """Enable HTML tags in source.""" + linkify: bool + """Enable autoconversion of URL-like texts to links.""" + typographer: bool + """Enable smartquotes and replacements.""" + quotes: str + """Quote characters.""" + xhtmlOut: bool + """Use '/' to close single tags (
).""" + breaks: bool + """Convert newlines in paragraphs into
.""" + langPrefix: str + """CSS language prefix for fenced blocks.""" + highlight: Callable[[str, str, str], str] | None + """Highlighter function: (content, lang, attrs) -> str.""" + + +class PresetType(TypedDict): + """Preset configuration for markdown-it.""" + + options: OptionsType + """Options for parsing.""" + components: MutableMapping[str, MutableMapping[str, list[str]]] + """Components for parsing and rendering.""" + + +class OptionsDict(MutableMappingABC): # type: ignore + """A dictionary, with attribute access to core markdownit configuration options.""" + # Note: ideally we would probably just remove attribute access entirely, + # but we keep it for backwards compatibility. -class OptionsDict(dict): - """A dictionary, with attribute access to core markdownit configuration options.""" + def __init__(self, options: OptionsType) -> None: + self._options = cast(OptionsType, dict(options)) + + def __getitem__(self, key: str) -> Any: + return self._options[key] # type: ignore[literal-required] + + def __setitem__(self, key: str, value: Any) -> None: + self._options[key] = value # type: ignore[literal-required] + + def __delitem__(self, key: str) -> None: + del self._options[key] # type: ignore + + def __iter__(self) -> Iterable[str]: # type: ignore + return iter(self._options) + + def __len__(self) -> int: + return len(self._options) + + def __repr__(self) -> str: + return repr(self._options) + + def __str__(self) -> str: + return str(self._options) @property def maxNesting(self) -> int: """Internal protection, recursion limit.""" - return self["maxNesting"] + return self._options["maxNesting"] @maxNesting.setter - def maxNesting(self, value: int): - self["maxNesting"] = value + def maxNesting(self, value: int) -> None: + self._options["maxNesting"] = value @property def html(self) -> bool: """Enable HTML tags in source.""" - return self["html"] + return self._options["html"] @html.setter - def html(self, value: bool): - self["html"] = value + def html(self, value: bool) -> None: + self._options["html"] = value @property def linkify(self) -> bool: """Enable autoconversion of URL-like texts to links.""" - return self["linkify"] + return self._options["linkify"] @linkify.setter - def linkify(self, value: bool): - self["linkify"] = value + def linkify(self, value: bool) -> None: + self._options["linkify"] = value @property def typographer(self) -> bool: """Enable smartquotes and replacements.""" - return self["typographer"] + return self._options["typographer"] @typographer.setter - def typographer(self, value: bool): - self["typographer"] = value + def typographer(self, value: bool) -> None: + self._options["typographer"] = value @property def quotes(self) -> str: """Quote characters.""" - return self["quotes"] + return self._options["quotes"] @quotes.setter - def quotes(self, value: str): - self["quotes"] = value + def quotes(self, value: str) -> None: + self._options["quotes"] = value @property def xhtmlOut(self) -> bool: """Use '/' to close single tags (
).""" - return self["xhtmlOut"] + return self._options["xhtmlOut"] @xhtmlOut.setter - def xhtmlOut(self, value: bool): - self["xhtmlOut"] = value + def xhtmlOut(self, value: bool) -> None: + self._options["xhtmlOut"] = value @property def breaks(self) -> bool: """Convert newlines in paragraphs into
.""" - return self["breaks"] + return self._options["breaks"] @breaks.setter - def breaks(self, value: bool): - self["breaks"] = value + def breaks(self, value: bool) -> None: + self._options["breaks"] = value @property def langPrefix(self) -> str: """CSS language prefix for fenced blocks.""" - return self["langPrefix"] + return self._options["langPrefix"] @langPrefix.setter - def langPrefix(self, value: str): - self["langPrefix"] = value + def langPrefix(self, value: str) -> None: + self._options["langPrefix"] = value @property def highlight(self) -> Callable[[str, str, str], str] | None: """Highlighter function: (content, langName, langAttrs) -> escaped HTML.""" - return self["highlight"] + return self._options["highlight"] @highlight.setter - def highlight(self, value: Callable[[str, str, str], str] | None): - self["highlight"] = value + def highlight(self, value: Callable[[str, str, str], str] | None) -> None: + self._options["highlight"] = value -def read_fixture_file(path: str | Path) -> list[list]: +def read_fixture_file(path: str | Path) -> list[list[Any]]: text = Path(path).read_text(encoding="utf-8") tests = [] section = 0 @@ -109,14 +174,3 @@ def read_fixture_file(path: str | Path) -> list[list]: last_pos = i return tests - - -def _removesuffix(string: str, suffix: str) -> str: - """Remove a suffix from a string. - - Replace this with str.removesuffix() from stdlib when minimum Python - version is 3.9. - """ - if suffix and string.endswith(suffix): - return string[: -len(suffix)] - return string diff --git a/pyproject.toml b/pyproject.toml index da8d9170..acf2a288 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,9 +87,12 @@ force_sort_within_sections = true show_error_codes = true warn_unused_ignores = true warn_redundant_casts = true -no_implicit_optional = true -strict_equality = true -implicit_reexport = false +strict = true + +[[tool.mypy.overrides]] +module = ["tests.*"] +disallow_untyped_calls = false +disallow_untyped_defs = false [[tool.mypy.overrides]] module = ["tests.test_plugins.*", "markdown.*"] diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py index 007259e3..c3a9ac8b 100644 --- a/tests/test_api/test_main.py +++ b/tests/test_api/test_main.py @@ -150,7 +150,7 @@ def test_parseInline(): type="inline", tag="", nesting=0, - attrs=None, + attrs={}, map=[0, 1], level=0, children=[ @@ -158,7 +158,7 @@ def test_parseInline(): type="text", tag="", nesting=0, - attrs=None, + attrs={}, map=None, level=0, children=None, @@ -173,7 +173,7 @@ def test_parseInline(): type="softbreak", tag="br", nesting=0, - attrs=None, + attrs={}, map=None, level=0, children=None, @@ -188,7 +188,7 @@ def test_parseInline(): type="softbreak", tag="br", nesting=0, - attrs=None, + attrs={}, map=None, level=0, children=None, @@ -203,7 +203,7 @@ def test_parseInline(): type="text", tag="", nesting=0, - attrs=None, + attrs={}, map=None, level=0, children=None, @@ -239,7 +239,7 @@ def test_emptyStr(): type="inline", tag="", nesting=0, - attrs=None, + attrs={}, map=[0, 1], level=0, children=[], @@ -257,7 +257,7 @@ def test_empty_env(): """Test that an empty `env` is mutated, not copied and mutated.""" md = MarkdownIt() - env = {} + env = {} # type: ignore md.render("[foo]: /url\n[foo]", env) assert "references" in env diff --git a/tests/test_api/test_token.py b/tests/test_api/test_token.py index e3806b50..44035981 100644 --- a/tests/test_api/test_token.py +++ b/tests/test_api/test_token.py @@ -24,7 +24,7 @@ def test_token(): assert token.attrGet("a") == "b" token.attrJoin("a", "c") assert token.attrGet("a") == "b c" - token.attrPush(["x", "y"]) + token.attrPush(("x", "y")) assert token.attrGet("x") == "y" with warnings.catch_warnings(): warnings.simplefilter("ignore") diff --git a/tests/test_linkify.py b/tests/test_linkify.py index 96d506d1..48b1981c 100644 --- a/tests/test_linkify.py +++ b/tests/test_linkify.py @@ -6,6 +6,7 @@ def test_token_levels(): tokens = mdit.parse("www.python.org") inline = tokens[1] assert inline.type == "inline" + assert inline.children link_open = inline.children[0] assert link_open.type == "link_open" link_text = inline.children[1] diff --git a/tests/test_port/test_references.py b/tests/test_port/test_references.py index 75bf7130..97f8a65a 100644 --- a/tests/test_port/test_references.py +++ b/tests/test_port/test_references.py @@ -4,7 +4,7 @@ def test_ref_definitions(): md = MarkdownIt() src = "[a]: abc\n\n[b]: xyz\n\n[b]: ijk" - env = {} + env = {} # type: ignore tokens = md.parse(src, env) assert tokens == [] assert env == { diff --git a/tests/test_tree.py b/tests/test_tree.py index 7a7d605e..c5203b0b 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -20,7 +20,7 @@ def test_property_passthrough(): tree = SyntaxTreeNode(tokens) heading_node = tree.children[0] assert heading_open.tag == heading_node.tag - assert tuple(heading_open.map) == heading_node.map + assert tuple(heading_open.map or ()) == heading_node.map assert heading_open.level == heading_node.level assert heading_open.content == heading_node.content assert heading_open.markup == heading_node.markup @@ -49,11 +49,13 @@ def test_sibling_traverse(): text_node = paragraph_inline_node.children[0] assert text_node.type == "text" strong_node = text_node.next_sibling + assert strong_node assert strong_node.type == "strong" another_text_node = strong_node.next_sibling + assert another_text_node assert another_text_node.type == "text" assert another_text_node.next_sibling is None - assert another_text_node.previous_sibling.previous_sibling == text_node + assert another_text_node.previous_sibling.previous_sibling == text_node # type: ignore assert text_node.previous_sibling is None From 9251695727cfa948bb18fc76a5dc85495cacc361 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 21:18:09 +0200 Subject: [PATCH 41/97] =?UTF-8?q?=F0=9F=91=8C=20Centralise=20indented=20co?= =?UTF-8?q?de=20block=20test=20(#260)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For CommonMark, the presence of indented code blocks prevent any other block element from having an indent of greater than 4 spaces. Certain Markdown flavors and derivatives, such as mdx and djot, disable these code blocks though, since it is more common to use code fences and/or arbitrary indenting is desirable. Currently, disabling code blocks does not remove the indent limitation, since most block elements have the 3 space limitation hard-coded. This commit therefore centralises the logic of applying this limitation, and only applies it when indented code blocks are enabled. Note, this is a potential breaking change and divergence from upstream markdown-it, for this niche case, but I feel makes sense and could even be upstreamed. --- markdown_it/rules_block/blockquote.py | 3 +- markdown_it/rules_block/code.py | 4 +- markdown_it/rules_block/fence.py | 6 +- markdown_it/rules_block/heading.py | 3 +- markdown_it/rules_block/hr.py | 3 +- markdown_it/rules_block/html_block.py | 3 +- markdown_it/rules_block/lheading.py | 3 +- markdown_it/rules_block/list.py | 6 +- markdown_it/rules_block/reference.py | 3 +- markdown_it/rules_block/state_block.py | 9 +++ markdown_it/rules_block/table.py | 7 +- .../test_port/fixtures/disable_code_block.md | 69 +++++++++++++++++++ tests/test_port/test_fixtures.py | 11 +++ 13 files changed, 104 insertions(+), 26 deletions(-) create mode 100644 tests/test_port/fixtures/disable_code_block.md diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 3ca0321c..da57dfa5 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -18,8 +18,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> pos = state.bMarks[startLine] + state.tShift[startLine] max = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if (state.sCount[startLine] - state.blkIndent) >= 4: + if state.is_code_block(startLine): return False # check the block quote marker diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py index 69bd6bdc..89db9cec 100644 --- a/markdown_it/rules_block/code.py +++ b/markdown_it/rules_block/code.py @@ -9,7 +9,7 @@ def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent) - if state.sCount[startLine] - state.blkIndent < 4: + if not state.is_code_block(startLine): return False last = nextLine = startLine + 1 @@ -19,7 +19,7 @@ def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: nextLine += 1 continue - if state.sCount[nextLine] - state.blkIndent >= 4: + if state.is_code_block(nextLine): nextLine += 1 last = nextLine continue diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index 2bdd95f8..b4b28979 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -13,8 +13,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False if pos + 3 > maximum: @@ -72,8 +71,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool except IndexError: break - if state.sCount[nextLine] - state.blkIndent >= 4: - # closing fence should be indented less than 4 spaces + if state.is_code_block(nextLine): continue pos = state.skipChars(pos, marker) diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 564e1726..90847f9d 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -15,8 +15,7 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False ch: int | None = state.srcCharCode[pos] diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 72ea010d..6e6b907b 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -16,8 +16,7 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False try: diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index 4831f562..dc3cadb1 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -38,8 +38,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False if not state.md.options.get("html", None): diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index a3806f8e..beb56698 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -15,8 +15,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b ruler: Ruler = state.md.block.ruler terminatorRules = ruler.getRules("paragraph") - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False oldParentType = state.parentType diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index 1592b599..eaaccda5 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -102,8 +102,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> isTerminatingParagraph = False tight = True - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False # Special case: @@ -295,8 +294,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if state.sCount[nextLine] < state.blkIndent: break - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): break # fail if terminating block found diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 5689064b..48f12721 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -16,8 +16,7 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> maximum = state.eMarks[startLine] nextLine = startLine + 1 - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False if state.srcCharCode[pos] != 0x5B: # /* [ */ diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 7ddf806c..02f8dc9c 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -116,6 +116,9 @@ def __init__( self.lineMax = len(self.bMarks) - 1 # don't count last fake line + # pre-check if code blocks are enabled, to speed up is_code_block method + self._code_enabled = "code" in self.md["block"].ruler.get_active_rules() + def __repr__(self) -> str: return ( f"{self.__class__.__name__}" @@ -228,3 +231,9 @@ def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: i += 1 return "".join(queue) + + def is_code_block(self, line: int) -> bool: + """Check if line is a code block, + i.e. the code block rule is enabled and text is indented by more than 3 spaces. + """ + return self._code_enabled and (self.sCount[line] - self.blkIndent) >= 4 diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index c432d44f..8f7be7f1 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -61,8 +61,7 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if state.sCount[nextLine] < state.blkIndent: return False - # if it's indented more than 3 spaces, it should be a code block - if state.sCount[nextLine] - state.blkIndent >= 4: + if state.is_code_block(nextLine): return False # first character of the second line should be '|', '-', ':', @@ -126,7 +125,7 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool lineText = getLine(state, startLine).strip() if "|" not in lineText: return False - if state.sCount[startLine] - state.blkIndent >= 4: + if state.is_code_block(startLine): return False columns = escapedSplit(lineText) if columns and columns[0] == "": @@ -192,7 +191,7 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool lineText = getLine(state, nextLine).strip() if not lineText: break - if state.sCount[nextLine] - state.blkIndent >= 4: + if state.is_code_block(nextLine): break columns = escapedSplit(lineText) if columns and columns[0] == "": diff --git a/tests/test_port/fixtures/disable_code_block.md b/tests/test_port/fixtures/disable_code_block.md new file mode 100644 index 00000000..35cf925c --- /dev/null +++ b/tests/test_port/fixtures/disable_code_block.md @@ -0,0 +1,69 @@ +indent paragraph +. + This is a paragraph, + with multiple lines. + + This paragraph +has variable indents, + like this. +. +

This is a paragraph, +with multiple lines.

+

This paragraph +has variable indents, +like this.

+. + +indent in HTML +. +
+ + Paragraph + +
+. +
+

Paragraph

+
+. + +indent fence +. + ```python + def foo(): + pass + ``` +. +
def foo():
+    pass
+
+. + +indent heading +. + # Heading +. +

Heading

+. + +indent table +. + | foo | bar | + | --- | --- | + | baz | bim | +. + + + + + + + + + + + + + +
foobar
bazbim
+. diff --git a/tests/test_port/test_fixtures.py b/tests/test_port/test_fixtures.py index d2199caf..74c7ee4d 100644 --- a/tests/test_port/test_fixtures.py +++ b/tests/test_port/test_fixtures.py @@ -104,6 +104,17 @@ def test_strikethrough(line, title, input, expected): assert text.rstrip() == expected.rstrip() +@pytest.mark.parametrize( + "line,title,input,expected", + read_fixture_file(FIXTURE_PATH.joinpath("disable_code_block.md")), +) +def test_disable_code_block(line, title, input, expected): + md = MarkdownIt().enable("table").disable("code") + text = md.render(input) + print(text.rstrip()) + assert text.rstrip() == expected.rstrip() + + @pytest.mark.parametrize( "line,title,input,expected", read_fixture_file(FIXTURE_PATH.joinpath("issue-fixes.md")), From 798b9d02bcc80b31773f68c05e5a1a82617d2798 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 22:03:04 +0200 Subject: [PATCH 42/97] =?UTF-8?q?=F0=9F=94=A7=20Move=20linting=20from=20fl?= =?UTF-8?q?ake8=20to=20ruff=20(#268)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 9 ++--- docs/conf.py | 2 +- markdown_it/common/normalize_url.py | 47 +++++++++++------------ markdown_it/main.py | 10 ++--- markdown_it/presets/__init__.py | 2 +- markdown_it/renderer.py | 25 ++++++------ markdown_it/ruler.py | 2 +- markdown_it/rules_block/fence.py | 5 +-- markdown_it/rules_block/list.py | 34 ++++++++-------- markdown_it/rules_block/reference.py | 23 ++++++----- markdown_it/rules_block/state_block.py | 9 +++-- markdown_it/rules_core/replacements.py | 47 ++++++++++++----------- markdown_it/rules_core/smartquotes.py | 16 ++++---- markdown_it/rules_inline/balance_pairs.py | 10 +++-- markdown_it/rules_inline/entity.py | 11 +++--- markdown_it/rules_inline/state_inline.py | 21 ++++------ markdown_it/token.py | 2 +- pyproject.toml | 5 +++ tests/test_cli.py | 5 +-- tox.ini | 4 -- 20 files changed, 141 insertions(+), 148 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 49f45ed2..2aecdc6d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,14 +33,13 @@ repos: hooks: - id: black - - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.270 hooks: - - id: flake8 - additional_dependencies: [flake8-bugbear~=22.7] + - id: ruff - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.2.0 + rev: v1.3.0 hooks: - id: mypy additional_dependencies: [mdurl] diff --git a/docs/conf.py b/docs/conf.py index e0a6e621..6a6ee557 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -121,7 +121,7 @@ def run_apidoc(app): shutil.rmtree(api_folder) os.mkdir(api_folder) - argv = ["-M", "--separate", "-o", api_folder, module_path] + ignore_paths + argv = ["-M", "--separate", "-o", api_folder, module_path, *ignore_paths] apidoc.OPTIONS.append("ignore-module-all") apidoc.main(argv) diff --git a/markdown_it/common/normalize_url.py b/markdown_it/common/normalize_url.py index a4ebbaae..92720b31 100644 --- a/markdown_it/common/normalize_url.py +++ b/markdown_it/common/normalize_url.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Callable +from contextlib import suppress import re from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401 @@ -21,18 +22,17 @@ def normalizeLink(url: str) -> str: """ parsed = mdurl.parse(url, slashes_denote_host=True) - if parsed.hostname: - # Encode hostnames in urls like: - # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` - # - # We don't encode unknown schemas, because it's likely that we encode - # something we shouldn't (e.g. `skype:name` treated as `skype:host`) - # - if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR: - try: - parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname)) - except Exception: - pass + # Encode hostnames in urls like: + # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` + # + # We don't encode unknown schemas, because it's likely that we encode + # something we shouldn't (e.g. `skype:name` treated as `skype:host`) + # + if parsed.hostname and ( + not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR + ): + with suppress(Exception): + parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname)) return mdurl.encode(mdurl.format(parsed)) @@ -47,18 +47,17 @@ def normalizeLinkText(url: str) -> str: """ parsed = mdurl.parse(url, slashes_denote_host=True) - if parsed.hostname: - # Encode hostnames in urls like: - # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` - # - # We don't encode unknown schemas, because it's likely that we encode - # something we shouldn't (e.g. `skype:name` treated as `skype:host`) - # - if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR: - try: - parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname)) - except Exception: - pass + # Encode hostnames in urls like: + # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` + # + # We don't encode unknown schemas, because it's likely that we encode + # something we shouldn't (e.g. `skype:name` treated as `skype:host`) + # + if parsed.hostname and ( + not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR + ): + with suppress(Exception): + parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname)) # add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720 return mdurl.decode(mdurl.format(parsed), mdurl.DECODE_DEFAULT_CHARS + "%") diff --git a/markdown_it/main.py b/markdown_it/main.py index acf8d079..243e1509 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -4,11 +4,11 @@ from contextlib import contextmanager from typing import Any, Literal, overload -from . import helpers, presets # noqa F401 -from .common import normalize_url, utils # noqa F401 -from .parser_block import ParserBlock # noqa F401 -from .parser_core import ParserCore # noqa F401 -from .parser_inline import ParserInline # noqa F401 +from . import helpers, presets +from .common import normalize_url, utils +from .parser_block import ParserBlock +from .parser_core import ParserCore +from .parser_inline import ParserInline from .renderer import RendererHTML, RendererProtocol from .rules_core.state_core import StateCore from .token import Token diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py index 22cf74cb..f1cb0507 100644 --- a/markdown_it/presets/__init__.py +++ b/markdown_it/presets/__init__.py @@ -6,7 +6,7 @@ js_default = default -class gfm_like: +class gfm_like: # noqa: N801 """GitHub Flavoured Markdown (GFM) like. This adds the linkify, table and strikethrough components to CommmonMark. diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 4cddbc67..7fee9ffa 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -152,19 +152,18 @@ def renderToken( if token.block: needLf = True - if token.nesting == 1: - if idx + 1 < len(tokens): - nextToken = tokens[idx + 1] - - if nextToken.type == "inline" or nextToken.hidden: - # Block-level tag containing an inline tag. - # - needLf = False - - elif nextToken.nesting == -1 and nextToken.tag == token.tag: - # Opening tag + closing tag of the same type. E.g. `
  • `. - # - needLf = False + if token.nesting == 1 and (idx + 1 < len(tokens)): + nextToken = tokens[idx + 1] + + if nextToken.type == "inline" or nextToken.hidden: # noqa: SIM114 + # Block-level tag containing an inline tag. + # + needLf = False + + elif nextToken.nesting == -1 and nextToken.tag == token.tag: + # Opening tag + closing tag of the same type. E.g. `
  • `. + # + needLf = False result += ">\n" if needLf else ">" diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 421666cc..8ae32beb 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -30,7 +30,7 @@ class Ruler class StateBase: - srcCharCode: tuple[int, ...] + srcCharCode: tuple[int, ...] # noqa: N815 def __init__(self, src: str, md: MarkdownIt, env: EnvType): self.src = src diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index b4b28979..2051b96b 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -38,9 +38,8 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool params = state.src[pos:maximum] # /* ` */ - if marker == 0x60: - if chr(marker) in params: - return False + if marker == 0x60 and chr(marker) in params: + return False # Since start is found, we can report success here in validation mode if silent: diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index eaaccda5..f1cb089e 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -120,14 +120,17 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # limit conditions when list can interrupt # a paragraph (validation mode only) - if silent and state.parentType == "paragraph": - # Next list item should still terminate previous list item - # - # This code can fail if plugins use blkIndent as well as lists, - # but I hope the spec gets fixed long before that happens. - # - if state.tShift[startLine] >= state.blkIndent: - isTerminatingParagraph = True + # Next list item should still terminate previous list item + # + # This code can fail if plugins use blkIndent as well as lists, + # but I hope the spec gets fixed long before that happens. + # + if ( + silent + and state.parentType == "paragraph" + and state.tShift[startLine] >= state.blkIndent + ): + isTerminatingParagraph = True # Detect list type and position after marker posAfterMarker = skipOrderedListMarker(state, startLine) @@ -149,9 +152,11 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # If we're starting a new unordered list right after # a paragraph, first line should not be empty. - if isTerminatingParagraph: - if state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]: - return False + if ( + isTerminatingParagraph + and state.skipSpaces(posAfterMarker) >= state.eMarks[startLine] + ): + return False # We should terminate list on style change. Remember first one to compare. markerCharCode = state.srcCharCode[posAfterMarker - 1] @@ -209,11 +214,8 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> contentStart = pos - if contentStart >= maximum: - # trimming space in "- \n 3" case, indent is 1 here - indentAfterMarker = 1 - else: - indentAfterMarker = offset - initial + # trimming space in "- \n 3" case, indent is 1 here + indentAfterMarker = 1 if contentStart >= maximum else offset - initial # If we have more than 4 spaces, the indent is 1 # (the rest is just indented code block) diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 48f12721..92f0918c 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -153,18 +153,17 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> break pos += 1 - if pos < maximum and charCodeAt(string, pos) != 0x0A: - if title: - # garbage at the end of the line after title, - # but it could still be a valid reference if we roll back - title = "" - pos = destEndPos - lines = destEndLineNo - while pos < maximum: - ch = charCodeAt(string, pos) - if not isSpace(ch): - break - pos += 1 + if pos < maximum and charCodeAt(string, pos) != 0x0A and title: + # garbage at the end of the line after title, + # but it could still be a valid reference if we roll back + title = "" + pos = destEndPos + lines = destEndLineNo + while pos < maximum: + ch = charCodeAt(string, pos) + if not isSpace(ch): + break + pos += 1 if pos < maximum and charCodeAt(string, pos) != 0x0A: # garbage at the end of the line diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 02f8dc9c..ee77f097 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -202,10 +202,11 @@ def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: while line < end: lineIndent = 0 lineStart = first = self.bMarks[line] - if line + 1 < end or keepLastLF: - last = self.eMarks[line] + 1 - else: - last = self.eMarks[line] + last = ( + self.eMarks[line] + 1 + if line + 1 < end or keepLastLF + else self.eMarks[line] + ) while (first < last) and (lineIndent < indent): ch = self.srcCharCode[first] diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index e5d81c7a..0b6e86af 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -78,29 +78,30 @@ def replace_rare(inlineTokens: list[Token]) -> None: inside_autolink = 0 for token in inlineTokens: - if token.type == "text" and not inside_autolink: - if RARE_RE.search(token.content): - # +- -> ± - token.content = PLUS_MINUS_RE.sub("±", token.content) - - # .., ..., ....... -> … - token.content = ELLIPSIS_RE.sub("…", token.content) - - # but ?..... & !..... -> ?.. & !.. - token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub( - "\\1..", token.content - ) - token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content) - - # ,, ,,, ,,,, -> , - token.content = COMMA_RE.sub(",", token.content) - - # em-dash - token.content = EM_DASH_RE.sub("\\1\u2014", token.content) - - # en-dash - token.content = EN_DASH_RE.sub("\\1\u2013", token.content) - token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content) + if ( + token.type == "text" + and (not inside_autolink) + and RARE_RE.search(token.content) + ): + # +- -> ± + token.content = PLUS_MINUS_RE.sub("±", token.content) + + # .., ..., ....... -> … + token.content = ELLIPSIS_RE.sub("…", token.content) + + # but ?..... & !..... -> ?.. & !.. + token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub("\\1..", token.content) + token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content) + + # ,, ,,, ,,,, -> , + token.content = COMMA_RE.sub(",", token.content) + + # em-dash + token.content = EM_DASH_RE.sub("\\1\u2014", token.content) + + # en-dash + token.content = EN_DASH_RE.sub("\\1\u2013", token.content) + token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content) if token.type == "link_open" and token.info == "auto": inside_autolink -= 1 diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index b11a5739..b4284493 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -100,19 +100,17 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: isLastWhiteSpace = isWhiteSpace(lastChar) isNextWhiteSpace = isWhiteSpace(nextChar) - if isNextWhiteSpace: + if isNextWhiteSpace: # noqa: SIM114 + canOpen = False + elif isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar): canOpen = False - elif isNextPunctChar: - if not (isLastWhiteSpace or isLastPunctChar): - canOpen = False - if isLastWhiteSpace: + if isLastWhiteSpace: # noqa: SIM114 + canClose = False + elif isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar): canClose = False - elif isLastPunctChar: - if not (isNextWhiteSpace or isNextPunctChar): - canClose = False - if nextChar == 0x22 and t.group(0) == '"': # 0x22: " + if nextChar == 0x22 and t.group(0) == '"': # 0x22: " # noqa: SIM102 if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9 # special case: 1"" - count first quote as an inch canClose = canOpen = False diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index ce0a0884..6125de71 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -60,10 +60,12 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: # closing delimiters must not be a multiple of 3 unless both lengths # are multiples of 3. # - if opener.close or closer.open: - if (opener.length + closer.length) % 3 == 0: - if opener.length % 3 != 0 or closer.length % 3 != 0: - isOddMatch = True + if ( + (opener.close or closer.open) + and ((opener.length + closer.length) % 3 == 0) + and (opener.length % 3 != 0 or closer.length % 3 != 0) + ): + isOddMatch = True if not isOddMatch: # If previous delimiter cannot be an opener, we can safely skip diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 9c4c6a0e..1e5d0ea0 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -40,12 +40,11 @@ def entity(state: StateInline, silent: bool) -> bool: else: match = NAMED_RE.search(state.src[pos:]) - if match: - if match.group(1) in entities: - if not silent: - state.pending += entities[match.group(1)] - state.pos += len(match.group(0)) - return True + if match and match.group(1) in entities: + if not silent: + state.pending += entities[match.group(1)] + state.pos += len(match.group(0)) + return True if not silent: state.pending += "&" diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 7c1cb1f3..12e1d934 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -131,8 +131,6 @@ def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: """ pos = start - left_flanking = True - right_flanking = True maximum = self.posMax marker = self.srcCharCode[start] @@ -153,17 +151,14 @@ def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: isLastWhiteSpace = isWhiteSpace(lastChar) isNextWhiteSpace = isWhiteSpace(nextChar) - if isNextWhiteSpace: - left_flanking = False - elif isNextPunctChar: - if not (isLastWhiteSpace or isLastPunctChar): - left_flanking = False - - if isLastWhiteSpace: - right_flanking = False - elif isLastPunctChar: - if not (isNextWhiteSpace or isNextPunctChar): - right_flanking = False + left_flanking = not ( + isNextWhiteSpace + or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar)) + ) + right_flanking = not ( + isLastWhiteSpace + or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar)) + ) if not canSplitWord: can_open = left_flanking and ((not right_flanking) or isLastPunctChar) diff --git a/markdown_it/token.py b/markdown_it/token.py index e3f6c9b9..90008b72 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -80,7 +80,7 @@ def __post_init__(self) -> None: self.attrs = convert_attrs(self.attrs) def attrIndex(self, name: str) -> int: - warnings.warn( + warnings.warn( # noqa: B028 "Token.attrIndex should not be used, since Token.attrs is a dictionary", UserWarning, ) diff --git a/pyproject.toml b/pyproject.toml index acf2a288..22b220c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,11 @@ exclude = [ profile = "black" force_sort_within_sections = true +[tool.ruff] +line-length = 100 +extend-select = ["B0", "C4", "ICN", "ISC", "N", "RUF", "SIM"] +extend-ignore = ["ISC003", "N802", "N803", "N806", "N816", "RUF003"] + [tool.mypy] show_error_codes = true warn_unused_ignores = true diff --git a/tests/test_cli.py b/tests/test_cli.py index c38e24fd..ed8d8205 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -37,7 +37,6 @@ def test_interactive(): def mock_input(prompt): raise KeyboardInterrupt - with patch("builtins.print") as patched: - with patch("builtins.input", mock_input): - parse.interactive() + with patch("builtins.print") as patched, patch("builtins.input", mock_input): + parse.interactive() patched.assert_called() diff --git a/tox.ini b/tox.ini index 251e18df..59ea5f9e 100644 --- a/tox.ini +++ b/tox.ini @@ -66,7 +66,3 @@ description = run fuzzer on testcase file deps = atheris commands_pre = python scripts/build_fuzzers.py {envdir}/oss-fuzz commands = python {envdir}/oss-fuzz/infra/helper.py reproduce markdown-it-py fuzz_markdown {posargs:testcase} - -[flake8] -max-line-length = 100 -extend-ignore = E203 From c6754a2fda48f19312a1e73cbb53e7a355f36165 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Wed, 31 May 2023 22:32:18 +0200 Subject: [PATCH 43/97] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20re-activate=20code?= =?UTF-8?q?=20cells=20(#269)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/_static/custom.css | 2 +- docs/conf.py | 15 +--------- docs/using.md | 66 ++++++++++++++++++++--------------------- pyproject.toml | 1 + 4 files changed, 36 insertions(+), 48 deletions(-) diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 9a16010b..a6c44314 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,4 +1,4 @@ -.code-cell > .highlight > pre { +.cell_output > .output > .highlight > pre { border-left-color: green; border-left-width: medium; border-left-style: solid; diff --git a/docs/conf.py b/docs/conf.py index 6a6ee557..2b48df1e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -36,6 +36,7 @@ "myst_parser", "sphinx_copybutton", "sphinx_design", + "jupyter_sphinx", ] # List of patterns, relative to source directory, that match files and @@ -135,17 +136,3 @@ def setup(app): """Add functions to the Sphinx setup.""" if os.environ.get("SKIP_APIDOC", None) is None: app.connect("builder-inited", run_apidoc) - - from sphinx.directives.code import CodeBlock - - class CodeCell(CodeBlock): - """Custom code block directive.""" - - def run(self): - """Run the directive.""" - self.options["class"] = ["code-cell"] - return super().run() - - # note, these could be run by myst-nb, - # but currently this causes a circular dependency issue - app.add_directive("code-cell", CodeCell) diff --git a/docs/using.md b/docs/using.md index aa632574..e2cf7e7e 100644 --- a/docs/using.md +++ b/docs/using.md @@ -27,17 +27,17 @@ then these are converted to other formats using 'renderers'. The simplest way to understand how text will be parsed is using: -```{code-cell} python +```{jupyter-execute} from pprint import pprint from markdown_it import MarkdownIt ``` -```{code-cell} python +```{jupyter-execute} md = MarkdownIt() md.render("some *text*") ``` -```{code-cell} python +```{jupyter-execute} for token in md.parse("some *text*"): print(token) print() @@ -59,24 +59,24 @@ You can define this configuration *via* directly supplying a dictionary or a pre Compared to `commonmark`, it enables the table, strikethrough and linkify components. **Important**, to use this configuration you must have `linkify-it-py` installed. -```{code-cell} python +```{jupyter-execute} from markdown_it.presets import zero zero.make() ``` -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("zero") md.options ``` You can also override specific options: -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("zero", {"maxNesting": 99}) md.options ``` -```{code-cell} python +```{jupyter-execute} pprint(md.get_active_rules()) ``` @@ -84,23 +84,23 @@ You can find all the parsing rules in the source code: `parser_core.py`, `parser_block.py`, `parser_inline.py`. -```{code-cell} python +```{jupyter-execute} pprint(md.get_all_rules()) ``` Any of the parsing rules can be enabled/disabled, and these methods are "chainable": -```{code-cell} python +```{jupyter-execute} md.render("- __*emphasise this*__") ``` -```{code-cell} python +```{jupyter-execute} md.enable(["list", "emphasis"]).render("- __*emphasise this*__") ``` You can temporarily modify rules with the `reset_rules` context manager. -```{code-cell} python +```{jupyter-execute} with md.reset_rules(): md.disable("emphasis") print(md.render("__*emphasise this*__")) @@ -109,7 +109,7 @@ md.render("__*emphasise this*__") Additionally `renderInline` runs the parser with all block syntax rules disabled. -```{code-cell} python +```{jupyter-execute} md.renderInline("__*emphasise this*__") ``` @@ -140,7 +140,7 @@ The `smartquotes` and `replacements` components are intended to improve typograp Both of these components require typography to be turned on, as well as the components enabled: -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("commonmark", {"typographer": True}) md.enable(["replacements", "smartquotes"]) md.render("'single quotes' (c)") @@ -151,7 +151,7 @@ md.render("'single quotes' (c)") The `linkify` component requires that [linkify-it-py](https://github.com/tsutsu3/linkify-it-py) be installed (e.g. *via* `pip install markdown-it-py[linkify]`). This allows URI autolinks to be identified, without the need for enclosing in `<>` brackets: -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("commonmark", {"linkify": True}) md.enable(["linkify"]) md.render("github.com") @@ -163,7 +163,7 @@ Plugins load collections of additional syntax rules and render methods into the A number of useful plugins are available in [`mdit_py_plugins`](https://github.com/executablebooks/mdit-py-plugins) (see [the plugin list](./plugins.md)), or you can create your own (following the [markdown-it design principles](./architecture.md)). -```{code-cell} python +```{jupyter-execute} from markdown_it import MarkdownIt import mdit_py_plugins from mdit_py_plugins.front_matter import front_matter_plugin @@ -175,7 +175,7 @@ md = ( .use(footnote_plugin) .enable('table') ) -text = (""" +text = ("""\ --- a: 1 --- @@ -188,7 +188,7 @@ A footnote [^1] [^1]: some details """) -md.render(text) +print(md.render(text)) ``` ## The Token Stream @@ -197,7 +197,7 @@ md.render(text) Before rendering, the text is parsed to a flat token stream of block level syntax elements, with nesting defined by opening (1) and closing (-1) attributes: -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("commonmark") tokens = md.parse(""" Here's some *text* @@ -211,17 +211,17 @@ Here's some *text* Naturally all openings should eventually be closed, such that: -```{code-cell} python +```{jupyter-execute} sum([t.nesting for t in tokens]) == 0 ``` All tokens are the same class, which can also be created outside the parser: -```{code-cell} python +```{jupyter-execute} tokens[0] ``` -```{code-cell} python +```{jupyter-execute} from markdown_it.token import Token token = Token("paragraph_open", "p", 1, block=True, map=[1, 2]) token == tokens[0] @@ -229,19 +229,19 @@ token == tokens[0] The `'inline'` type token contain the inline tokens as children: -```{code-cell} python +```{jupyter-execute} tokens[1] ``` You can serialize a token (and its children) to a JSONable dictionary using: -```{code-cell} python +```{jupyter-execute} print(tokens[1].as_dict()) ``` This dictionary can also be deserialized: -```{code-cell} python +```{jupyter-execute} Token.from_dict(tokens[1].as_dict()) ``` @@ -254,7 +254,7 @@ Token.from_dict(tokens[1].as_dict()) In some use cases it may be useful to convert the token stream into a syntax tree, with opening/closing tokens collapsed into a single token that contains children. -```{code-cell} python +```{jupyter-execute} from markdown_it.tree import SyntaxTreeNode md = MarkdownIt("commonmark") @@ -274,11 +274,11 @@ print(node.pretty(indent=2, show_text=True)) You can then use methods to traverse the tree -```{code-cell} python +```{jupyter-execute} node.children ``` -```{code-cell} python +```{jupyter-execute} print(node[0]) node[0].next_sibling ``` @@ -302,7 +302,7 @@ def function(renderer, tokens, idx, options, env): You can inject render methods into the instantiated render class. -```{code-cell} python +```{jupyter-execute} md = MarkdownIt("commonmark") def render_em_open(self, tokens, idx, options, env): @@ -319,7 +319,7 @@ Also `add_render_rule` method is specific to Python, rather than adding directly You can also subclass a render and add the method there: -```{code-cell} python +```{jupyter-execute} from markdown_it.renderer import RendererHTML class MyRenderer(RendererHTML): @@ -332,7 +332,7 @@ md.render("*a*") Plugins can support multiple render types, using the `__output__` attribute (this is currently a Python only feature). -```{code-cell} python +```{jupyter-execute} from markdown_it.renderer import RendererHTML class MyRenderer1(RendererHTML): @@ -358,7 +358,7 @@ print(md.render("*a*")) Here's a more concrete example; let's replace images with vimeo links to player's iframe: -```{code-cell} python +```{jupyter-execute} import re from markdown_it import MarkdownIt @@ -384,7 +384,7 @@ print(md.render("![](https://www.vimeo.com/123)")) Here is another example, how to add `target="_blank"` to all links: -```{code-cell} python +```{jupyter-execute} from markdown_it import MarkdownIt def render_blank_link(self, tokens, idx, options, env): @@ -402,7 +402,7 @@ print(md.render("[a]\n\n[a]: b")) You can also render a token stream directly to markdown via the `MDRenderer` class from [`mdformat`](https://github.com/executablebooks/mdformat): -```{code-cell} python +```python from markdown_it import MarkdownIt from mdformat.renderer import MDRenderer diff --git a/pyproject.toml b/pyproject.toml index 22b220c8..b0d64fb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ rtd = [ "sphinx-copybutton", "sphinx-design", "sphinx_book_theme", + "jupyter_sphinx", ] testing = [ "coverage", From f52249e1c26c7e66c8504848f582fcd3de85ab3d Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 1 Jun 2023 03:45:46 +0200 Subject: [PATCH 44/97] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20REFACTOR:=20Replace?= =?UTF-8?q?=20character=20codes=20with=20strings=20(#270)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The use of `StateBase.srcCharCode` is deprecated (with backward-compatibility), and all core uses are replaced by `StateBase.src`. Conversion of source string characters to an integer representing the Unicode character is prevalent in the upstream JavaScript implementation, to improve performance. However, it is unnecessary in Python and leads to harder to read code and performance deprecations (during the conversion in the `StateBase` initialisation). `StateBase.srcCharCode` is no longer populated on initiation, but is left as an on-demand, cached property, to allow backward compatibility for plugins (deprecation warnings are emitted to identify where updates are required). `isStrSpace` is supplied as a replacement for `isSpace`, and similarly `StateBlock.skipCharsStr`/`StateBlock.skipCharsStrBack` replace `StateBlock.skipChars`/`StateBlock.skipCharsBack` Co-authored-by: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> --- markdown_it/common/utils.py | 34 ++++++++-- markdown_it/helpers/parse_link_destination.py | 2 +- markdown_it/helpers/parse_link_label.py | 6 +- markdown_it/main.py | 2 +- markdown_it/parser_block.py | 9 +-- markdown_it/port.yaml | 3 +- markdown_it/ruler.py | 16 ++++- markdown_it/rules_block/blockquote.py | 32 +++++----- markdown_it/rules_block/fence.py | 14 ++--- markdown_it/rules_block/heading.py | 20 +++--- markdown_it/rules_block/hr.py | 14 ++--- markdown_it/rules_block/html_block.py | 2 +- markdown_it/rules_block/lheading.py | 13 ++-- markdown_it/rules_block/list.py | 48 +++++++------- markdown_it/rules_block/reference.py | 6 +- markdown_it/rules_block/state_block.py | 59 +++++++++-------- markdown_it/rules_block/table.py | 33 +++++----- markdown_it/rules_core/block.py | 4 +- markdown_it/rules_core/smartquotes.py | 25 ++++---- markdown_it/rules_inline/autolink.py | 8 +-- markdown_it/rules_inline/backticks.py | 8 +-- markdown_it/rules_inline/emphasis.py | 11 ++-- markdown_it/rules_inline/entity.py | 6 +- markdown_it/rules_inline/escape.py | 52 +++++++++++---- markdown_it/rules_inline/html_inline.py | 11 +--- markdown_it/rules_inline/image.py | 30 +++++---- markdown_it/rules_inline/link.py | 22 +++---- markdown_it/rules_inline/newline.py | 11 ++-- markdown_it/rules_inline/state_inline.py | 16 ++--- markdown_it/rules_inline/strikethrough.py | 21 +++---- markdown_it/rules_inline/text.py | 63 +++++++++---------- scripts/profiler.py | 2 +- 32 files changed, 321 insertions(+), 282 deletions(-) diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index ed862e74..4effc00f 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -4,12 +4,12 @@ import html import re -from typing import Any, Match, TypeVar +from typing import Match, TypeVar from .entities import entities -def charCodeAt(src: str, pos: int) -> Any: +def charCodeAt(src: str, pos: int) -> int | None: """ Returns the Unicode value of the character at the specified location. @@ -24,6 +24,21 @@ def charCodeAt(src: str, pos: int) -> Any: return None +def charStrAt(src: str, pos: int) -> str | None: + """ + Returns the Unicode value of the character at the specified location. + + @param - index The zero-based index of the desired character. + If there is no character at the specified index, NaN is returned. + + This was added for compatibility with python + """ + try: + return src[pos] + except IndexError: + return None + + _ItemTV = TypeVar("_ItemTV") @@ -96,7 +111,7 @@ def replaceEntityPattern(match: str, name: str) -> str: if name in entities: return entities[name] - if ord(name[0]) == 0x23 and DIGITAL_ENTITY_TEST_RE.search(name): + if name[0] == "#" and DIGITAL_ENTITY_TEST_RE.search(name): code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10) if isValidEntityCode(code): return fromCodePoint(code) @@ -178,8 +193,14 @@ def escapeRE(string: str) -> str: # ////////////////////////////////////////////////////////////////////////////// -def isSpace(code: object) -> bool: - return code in {0x09, 0x20} +def isSpace(code: int | None) -> bool: + """Check if character code is a whitespace.""" + return code in (0x09, 0x20) + + +def isStrSpace(ch: str | None) -> bool: + """Check if character is a whitespace.""" + return ch in ("\t", " ") MD_WHITESPACE = { @@ -188,7 +209,7 @@ def isSpace(code: object) -> bool: 0x0B, # \v 0x0C, # \f 0x0D, # \r - 0x20, + 0x20, # space 0xA0, 0x1680, 0x202F, @@ -213,6 +234,7 @@ def isWhiteSpace(code: int) -> bool: # Currently without astral characters support. def isPunctChar(ch: str) -> bool: + """Check if character is a punctuation character.""" return UNICODE_PUNCT_RE.search(ch) is not None diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index d527ce0c..f42b2244 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -49,7 +49,7 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result: while pos < maximum: code = charCodeAt(string, pos) - if code == 0x20: + if code is None or code == 0x20: break # ascii control characters diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py index 6ce8daf8..01c653c5 100644 --- a/markdown_it/helpers/parse_link_label.py +++ b/markdown_it/helpers/parse_link_label.py @@ -17,8 +17,8 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) level = 1 while state.pos < state.posMax: - marker = state.srcCharCode[state.pos] - if marker == 0x5D: # /* ] */) + marker = state.src[state.pos] + if marker == "]": level -= 1 if level == 0: found = True @@ -26,7 +26,7 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) prevPos = state.pos state.md.inline.skipToken(state) - if marker == 0x5B: # /* [ */) + if marker == "[": if prevPos == state.pos - 1: # increase level if we find text `[`, # which is not a part of any token diff --git a/markdown_it/main.py b/markdown_it/main.py index 243e1509..bb294a99 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -46,7 +46,7 @@ def __init__( """ # add modules self.utils = utils - self.helpers: Any = helpers + self.helpers = helpers # initialise classes self.inline = ParserInline() diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index cd240a8a..86f08cf5 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -97,16 +97,11 @@ def tokenize( state.line = line def parse( - self, - src: str, - md: MarkdownIt, - env: EnvType, - outTokens: list[Token], - ords: tuple[int, ...] | None = None, + self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token] ) -> list[Token] | None: """Process input string and push block tokens into `outTokens`.""" if not src: return None - state = StateBlock(src, md, env, outTokens, ords) + state = StateBlock(src, md, env, outTokens) self.tokenize(state, state.line, state.lineMax) return state.tokens diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index a6718fda..945a19f6 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -23,8 +23,7 @@ to manipulate `Token.attrs`, which have an identical signature to those upstream. - Use python version of `charCodeAt` - | - Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state - objects and sharing those whenever possible + Use `str` units instead of `int`s to represent Unicode codepoints. This provides a significant performance boost - | In markdown_it/rules_block/reference.py, diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 8ae32beb..9849561d 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -20,6 +20,7 @@ class Ruler from collections.abc import Callable, Iterable from dataclasses import dataclass, field from typing import TYPE_CHECKING, TypedDict +import warnings from markdown_it._compat import DATACLASS_KWARGS @@ -30,8 +31,6 @@ class Ruler class StateBase: - srcCharCode: tuple[int, ...] # noqa: N815 - def __init__(self, src: str, md: MarkdownIt, env: EnvType): self.src = src self.env = env @@ -44,7 +43,18 @@ def src(self) -> str: @src.setter def src(self, value: str) -> None: self._src = value - self.srcCharCode = tuple(ord(c) for c in self.src) + self._srcCharCode: tuple[int, ...] | None = None + + @property + def srcCharCode(self) -> tuple[int, ...]: + warnings.warn( + "StateBase.srcCharCode is deprecated. Use StateBase.src instead.", + DeprecationWarning, + stacklevel=2, + ) + if self._srcCharCode is None: + self._srcCharCode = tuple(ord(c) for c in self._src) + return self._srcCharCode # The first positional arg is always a subtype of `StateBase`. Other diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index da57dfa5..0c9081b9 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -3,7 +3,7 @@ import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -23,7 +23,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # check the block quote marker try: - if state.srcCharCode[pos] != 0x3E: # /* > */ + if state.src[pos] != ">": return False except IndexError: return False @@ -38,12 +38,12 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> initial = offset = state.sCount[startLine] + 1 try: - second_char_code: int | None = state.srcCharCode[pos] + second_char: str | None = state.src[pos] except IndexError: - second_char_code = None + second_char = None # skip one optional space after '>' - if second_char_code == 0x20: # /* space */ + if second_char == " ": # ' > test ' # ^ -- position start of line here: pos += 1 @@ -51,7 +51,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> offset += 1 adjustTab = False spaceAfterMarker = True - elif second_char_code == 0x09: # /* tab */ + elif second_char == "\t": spaceAfterMarker = True if (state.bsCount[startLine] + offset) % 4 == 3: @@ -74,10 +74,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.bMarks[startLine] = pos while pos < max: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if isSpace(ch): - if ch == 0x09: # / tab / + if isStrSpace(ch): + if ch == "\t": offset += ( 4 - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4 @@ -147,7 +147,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # Case 1: line is not inside the blockquote, and this line is empty. break - evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */ + evaluatesTrue = state.src[pos] == ">" and not isOutdented pos += 1 if evaluatesTrue: # This line is inside the blockquote. @@ -156,12 +156,12 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> initial = offset = state.sCount[nextLine] + 1 try: - next_char: int | None = state.srcCharCode[pos] + next_char: str | None = state.src[pos] except IndexError: next_char = None # skip one optional space after '>' - if next_char == 0x20: # /* space */ + if next_char == " ": # ' > test ' # ^ -- position start of line here: pos += 1 @@ -169,7 +169,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> offset += 1 adjustTab = False spaceAfterMarker = True - elif next_char == 0x09: # /* tab */ + elif next_char == "\t": spaceAfterMarker = True if (state.bsCount[nextLine] + offset) % 4 == 3: @@ -192,10 +192,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.bMarks[nextLine] = pos while pos < max: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if isSpace(ch): - if ch == 0x09: + if isStrSpace(ch): + if ch == "\t": offset += ( 4 - ( diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index 2051b96b..263f1b8d 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -19,15 +19,14 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if pos + 3 > maximum: return False - marker = state.srcCharCode[pos] + marker = state.src[pos] - # /* ~ */ /* ` */ - if marker != 0x7E and marker != 0x60: + if marker not in ("~", "`"): return False # scan marker length mem = pos - pos = state.skipChars(pos, marker) + pos = state.skipCharsStr(pos, marker) length = pos - mem @@ -37,8 +36,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool markup = state.src[mem:pos] params = state.src[pos:maximum] - # /* ` */ - if marker == 0x60 and chr(marker) in params: + if marker == "`" and marker in params: return False # Since start is found, we can report success here in validation mode @@ -65,7 +63,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool break try: - if state.srcCharCode[pos] != marker: + if state.src[pos] != marker: continue except IndexError: break @@ -73,7 +71,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if state.is_code_block(nextLine): continue - pos = state.skipChars(pos, marker) + pos = state.skipCharsStr(pos, marker) # closing code fence must be at least as long as the opening one if pos - mem < length: diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 90847f9d..850ffb50 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -3,7 +3,7 @@ import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -18,29 +18,27 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo if state.is_code_block(startLine): return False - ch: int | None = state.srcCharCode[pos] + ch: str | None = state.src[pos] - # /* # */ - if ch != 0x23 or pos >= maximum: + if ch != "#" or pos >= maximum: return False # count heading level level = 1 pos += 1 try: - ch = state.srcCharCode[pos] + ch = state.src[pos] except IndexError: ch = None - # /* # */ - while ch == 0x23 and pos < maximum and level <= 6: + while ch == "#" and pos < maximum and level <= 6: level += 1 pos += 1 try: - ch = state.srcCharCode[pos] + ch = state.src[pos] except IndexError: ch = None - if level > 6 or (pos < maximum and not isSpace(ch)): + if level > 6 or (pos < maximum and not isStrSpace(ch)): return False if silent: @@ -49,8 +47,8 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo # Let's cut tails like ' ### ' from the end of string maximum = state.skipSpacesBack(maximum, pos) - tmp = state.skipCharsBack(maximum, 0x23, pos) # # - if tmp > pos and isSpace(state.srcCharCode[tmp - 1]): + tmp = state.skipCharsStrBack(maximum, "#", pos) + if tmp > pos and isStrSpace(state.src[tmp - 1]): maximum = tmp state.line = startLine + 1 diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 6e6b907b..16df05f2 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -4,7 +4,7 @@ """ import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -20,22 +20,22 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: return False try: - marker = state.srcCharCode[pos] + marker = state.src[pos] except IndexError: return False pos += 1 - # Check hr marker: /* * */ /* - */ /* _ */ - if marker != 0x2A and marker != 0x2D and marker != 0x5F: + # Check hr marker + if marker not in ("*", "-", "_"): return False # markers can be mixed with spaces, but there should be at least 3 of them cnt = 1 while pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] pos += 1 - if ch != marker and not isSpace(ch): + if ch != marker and not isStrSpace(ch): return False if ch == marker: cnt += 1 @@ -50,6 +50,6 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: token = state.push("hr", "hr", 0) token.map = [startLine, state.line] - token.markup = chr(marker) * (cnt + 1) + token.markup = marker * (cnt + 1) return True diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index dc3cadb1..3d43f6ee 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -44,7 +44,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if not state.md.options.get("html", None): return False - if state.srcCharCode[pos] != 0x3C: # /* < */ + if state.src[pos] != "<": return False lineText = state.src[pos:maximum] diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index beb56698..fbd50699 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -35,16 +35,15 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b maximum = state.eMarks[nextLine] if pos < maximum: - marker = state.srcCharCode[pos] + marker = state.src[pos] - # /* - */ /* = */ - if marker == 0x2D or marker == 0x3D: - pos = state.skipChars(pos, marker) + if marker in ("-", "="): + pos = state.skipCharsStr(pos, marker) pos = state.skipSpaces(pos) # /* = */ if pos >= maximum: - level = 1 if marker == 0x3D else 2 + level = 1 if marker == "=" else 2 break # quirk for blockquotes, this line should already be checked by that rule @@ -72,7 +71,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b state.line = nextLine + 1 token = state.push("heading_open", "h" + str(level), 1) - token.markup = chr(marker) + token.markup = marker token.map = [startLine, state.line] token = state.push("inline", "", 0) @@ -81,7 +80,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b token.children = [] token = state.push("heading_close", "h" + str(level), -1) - token.markup = chr(marker) + token.markup = marker state.parentType = oldParentType diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index f1cb089e..a5c596bb 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -1,7 +1,7 @@ # Lists import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -14,18 +14,18 @@ def skipBulletListMarker(state: StateBlock, startLine: int) -> int: maximum = state.eMarks[startLine] try: - marker = state.srcCharCode[pos] + marker = state.src[pos] except IndexError: return -1 pos += 1 - # Check bullet /* * */ /* - */ /* + */ - if marker != 0x2A and marker != 0x2D and marker != 0x2B: + + if marker not in ("*", "-", "+"): return -1 if pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if not isSpace(ch): + if not isStrSpace(ch): # " -test " - is not a list item return -1 @@ -43,11 +43,12 @@ def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: if pos + 1 >= maximum: return -1 - ch = state.srcCharCode[pos] + ch = state.src[pos] pos += 1 + ch_ord = ord(ch) # /* 0 */ /* 9 */ - if ch < 0x30 or ch > 0x39: + if ch_ord < 0x30 or ch_ord > 0x39: return -1 while True: @@ -55,11 +56,12 @@ def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: if pos >= maximum: return -1 - ch = state.srcCharCode[pos] + ch = state.src[pos] pos += 1 # /* 0 */ /* 9 */ - if ch >= 0x30 and ch <= 0x39: + ch_ord = ord(ch) + if ch_ord >= 0x30 and ch_ord <= 0x39: # List marker should have no more than 9 digits # (prevents integer overflow in browsers) if pos - start >= 10: @@ -67,16 +69,16 @@ def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: continue - # found valid marker: /* ) */ /* . */ - if ch == 0x29 or ch == 0x2E: + # found valid marker + if ch in (")", "."): break return -1 if pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if not isSpace(ch): + if not isStrSpace(ch): # " 1.test " - is not a list item return -1 @@ -159,7 +161,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> return False # We should terminate list on style change. Remember first one to compare. - markerCharCode = state.srcCharCode[posAfterMarker - 1] + markerChar = state.src[posAfterMarker - 1] # For validation mode we can terminate immediately if silent: @@ -177,7 +179,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> token = state.push("bullet_list_open", "ul", 1) token.map = listLines = [startLine, 0] - token.markup = chr(markerCharCode) + token.markup = markerChar # # Iterate list items @@ -201,11 +203,11 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> ) while pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if ch == 0x09: # \t + if ch == "\t": offset += 4 - (offset + state.bsCount[nextLine]) % 4 - elif ch == 0x20: # \s + elif ch == " ": offset += 1 else: break @@ -228,7 +230,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # Run subparser & write tokens token = state.push("list_item_open", "li", 1) - token.markup = chr(markerCharCode) + token.markup = markerChar token.map = itemLines = [startLine, 0] if isOrdered: token.info = state.src[start : posAfterMarker - 1] @@ -280,7 +282,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.tight = oldTight token = state.push("list_item_close", "li", -1) - token.markup = chr(markerCharCode) + token.markup = markerChar nextLine = startLine = state.line itemLines[1] = nextLine @@ -320,7 +322,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if posAfterMarker < 0: break - if markerCharCode != state.srcCharCode[posAfterMarker - 1]: + if markerChar != state.src[posAfterMarker - 1]: break # Finalize list @@ -329,7 +331,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> else: token = state.push("bullet_list_close", "ul", -1) - token.markup = chr(markerCharCode) + token.markup = markerChar listLines[1] = nextLine state.line = nextLine diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 92f0918c..b77944b2 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -19,17 +19,17 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> if state.is_code_block(startLine): return False - if state.srcCharCode[pos] != 0x5B: # /* [ */ + if state.src[pos] != "[": return False # Simple check to quickly interrupt scan on [link](url) at the start of line. # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54 while pos < maximum: # /* ] */ /* \ */ /* : */ - if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C: + if state.src[pos] == "]" and state.src[pos - 1] != "\\": if pos + 1 == maximum: return False - if state.srcCharCode[pos + 1] != 0x3A: + if state.src[pos + 1] != ":": return False break pos += 1 diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index ee77f097..96a2f88f 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Literal -from ..common.utils import isSpace +from ..common.utils import isStrSpace from ..ruler import StateBase from ..token import Token from ..utils import EnvType @@ -13,18 +13,9 @@ class StateBlock(StateBase): def __init__( - self, - src: str, - md: MarkdownIt, - env: EnvType, - tokens: list[Token], - srcCharCode: tuple[int, ...] | None = None, - ): - if srcCharCode is not None: - self._src = src - self.srcCharCode = srcCharCode - else: - self.src = src + self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token] + ) -> None: + self.src = src # link to parser instance self.md = md @@ -80,12 +71,12 @@ def __init__( start = pos = indent = offset = 0 length = len(self.src) - for pos, character in enumerate(self.srcCharCode): + for pos, character in enumerate(self.src): if not indent_found: - if isSpace(character): + if isStrSpace(character): indent += 1 - if character == 0x09: + if character == "\t": offset += 4 - offset % 4 else: offset += 1 @@ -93,8 +84,8 @@ def __init__( else: indent_found = True - if character == 0x0A or pos == length - 1: - if character != 0x0A: + if character == "\n" or pos == length - 1: + if character != "\n": pos += 1 self.bMarks.append(start) self.eMarks.append(pos) @@ -157,7 +148,7 @@ def skipEmptyLines(self, from_pos: int) -> int: def skipSpaces(self, pos: int) -> int: """Skip spaces from given position.""" while pos < len(self.src): - if not isSpace(self.srcCharCode[pos]): + if not isStrSpace(self.src[pos]): break pos += 1 return pos @@ -168,20 +159,28 @@ def skipSpacesBack(self, pos: int, minimum: int) -> int: return pos while pos > minimum: pos -= 1 - if not isSpace(self.srcCharCode[pos]): + if not isStrSpace(self.src[pos]): return pos + 1 return pos def skipChars(self, pos: int, code: int) -> int: - """Skip char codes from given position.""" + """Skip character code from given position.""" while pos < len(self.src): if self.srcCharCode[pos] != code: break pos += 1 return pos + def skipCharsStr(self, pos: int, ch: str) -> int: + """Skip character string from given position.""" + while pos < len(self.src): + if self.src[pos] != ch: + break + pos += 1 + return pos + def skipCharsBack(self, pos: int, code: int, minimum: int) -> int: - """Skip char codes reverse from given position - 1.""" + """Skip character code reverse from given position - 1.""" if pos <= minimum: return pos while pos > minimum: @@ -190,6 +189,16 @@ def skipCharsBack(self, pos: int, code: int, minimum: int) -> int: return pos + 1 return pos + def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int: + """Skip character string reverse from given position - 1.""" + if pos <= minimum: + return pos + while pos > minimum: + pos -= 1 + if ch != self.src[pos]: + return pos + 1 + return pos + def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: """Cut lines range from source.""" line = begin @@ -209,9 +218,9 @@ def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: ) while (first < last) and (lineIndent < indent): - ch = self.srcCharCode[first] - if isSpace(ch): - if ch == 0x09: + ch = self.src[first] + if isStrSpace(ch): + if ch == "\t": lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4 else: lineIndent += 1 diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index 8f7be7f1..4b666c1d 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -3,7 +3,7 @@ import re -from ..common.utils import charCodeAt, isSpace +from ..common.utils import charStrAt, isStrSpace from .state_block import StateBlock headerLineRe = re.compile(r"^:?-+:?$") @@ -25,10 +25,10 @@ def escapedSplit(string: str) -> list[str]: isEscaped = False lastPos = 0 current = "" - ch = charCodeAt(string, pos) + ch = charStrAt(string, pos) while pos < max: - if ch == 0x7C: # /* | */ + if ch == "|": if not isEscaped: # pipe separating cells, '|' result.append(current + string[lastPos:pos]) @@ -39,10 +39,10 @@ def escapedSplit(string: str) -> list[str]: current += string[lastPos : pos - 1] lastPos = pos - isEscaped = ch == 0x5C # /* \ */ + isEscaped = ch == "\\" pos += 1 - ch = charCodeAt(string, pos) + ch = charStrAt(string, pos) result.append(current + string[lastPos:]) @@ -71,29 +71,27 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool pos = state.bMarks[nextLine] + state.tShift[nextLine] if pos >= state.eMarks[nextLine]: return False - first_ch = state.srcCharCode[pos] + first_ch = state.src[pos] pos += 1 - if first_ch not in {0x7C, 0x2D, 0x3A}: # not in {"|", "-", ":"} + if first_ch not in ("|", "-", ":"): return False if pos >= state.eMarks[nextLine]: return False - second_ch = state.srcCharCode[pos] + second_ch = state.src[pos] pos += 1 - # not in {"|", "-", ":"} and not space - if second_ch not in {0x7C, 0x2D, 0x3A} and not isSpace(second_ch): + if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch): return False # if first character is '-', then second character must not be a space # (due to parsing ambiguity with list) - if first_ch == 0x2D and isSpace(second_ch): + if first_ch == "-" and isStrSpace(second_ch): return False while pos < state.eMarks[nextLine]: - ch = state.srcCharCode[pos] + ch = state.src[pos] - # /* | */ /* - */ /* : */ - if ch not in {0x7C, 0x2D, 0x3A} and not isSpace(ch): + if ch not in ("|", "-", ":") and not isStrSpace(ch): return False pos += 1 @@ -114,10 +112,9 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if not headerLineRe.search(t): return False - if charCodeAt(t, len(t) - 1) == 0x3A: # /* : */ - # /* : */ - aligns.append("center" if charCodeAt(t, 0) == 0x3A else "right") - elif charCodeAt(t, 0) == 0x3A: # /* : */ + if charStrAt(t, len(t) - 1) == ":": + aligns.append("center" if charStrAt(t, 0) == ":" else "right") + elif charStrAt(t, 0) == ":": aligns.append("left") else: aligns.append("") diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py index dc756418..a6c3bb8d 100644 --- a/markdown_it/rules_core/block.py +++ b/markdown_it/rules_core/block.py @@ -10,6 +10,4 @@ def block(state: StateCore) -> None: token.children = [] state.tokens.append(token) else: - state.md.block.parse( - state.src, state.md, state.env, state.tokens, state.srcCharCode - ) + state.md.block.parse(state.src, state.md, state.env, state.tokens) diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index b4284493..c98fbd71 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -24,9 +24,7 @@ def replaceAt(string: str, index: int, ch: str) -> str: def process_inlines(tokens: list[Token], state: StateCore) -> None: stack: list[dict[str, Any]] = [] - for i in range(len(tokens)): - token = tokens[i] - + for i, token in enumerate(tokens): thisLevel = token.level j = 0 @@ -60,13 +58,12 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: # Find previous character, # default to space if it's the beginning of the line - lastChar = 0x20 + lastChar: None | int = 0x20 if t.start(0) + lastIndex - 1 >= 0: lastChar = charCodeAt(text, t.start(0) + lastIndex - 1) else: for j in range(i)[::-1]: - # lastChar defaults to 0x20 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": break # should skip all tokens except 'text', 'html_inline' or 'code_inline' @@ -78,7 +75,7 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: # Find next character, # default to space if it's the end of the line - nextChar = 0x20 + nextChar: None | int = 0x20 if pos < maximum: nextChar = charCodeAt(text, pos) @@ -94,11 +91,15 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: nextChar = charCodeAt(tokens[j].content, 0) break - isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) - isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + isLastPunctChar = lastChar is not None and ( + isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) + ) + isNextPunctChar = nextChar is not None and ( + isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + ) - isLastWhiteSpace = isWhiteSpace(lastChar) - isNextWhiteSpace = isWhiteSpace(nextChar) + isLastWhiteSpace = lastChar is not None and isWhiteSpace(lastChar) + isNextWhiteSpace = nextChar is not None and isWhiteSpace(nextChar) if isNextWhiteSpace: # noqa: SIM114 canOpen = False @@ -111,7 +112,9 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: canClose = False if nextChar == 0x22 and t.group(0) == '"': # 0x22: " # noqa: SIM102 - if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9 + if ( + lastChar is not None and lastChar >= 0x30 and lastChar <= 0x39 + ): # 0x30: 0, 0x39: 9 # special case: 1"" - count first quote as an inch canClose = canOpen = False diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index 11ac5905..295d963f 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -12,7 +12,7 @@ def autolink(state: StateInline, silent: bool) -> bool: pos = state.pos - if state.srcCharCode[pos] != 0x3C: # /* < */ + if state.src[pos] != "<": return False start = state.pos @@ -23,11 +23,11 @@ def autolink(state: StateInline, silent: bool) -> bool: if pos >= maximum: return False - ch = state.srcCharCode[pos] + ch = state.src[pos] - if ch == 0x3C: # /* < */ + if ch == "<": return False - if ch == 0x3E: # /* > */ + if ch == ">": break url = state.src[start + 1 : pos] diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py index 5f1e0552..fc60d6b1 100644 --- a/markdown_it/rules_inline/backticks.py +++ b/markdown_it/rules_inline/backticks.py @@ -8,10 +8,8 @@ def backtick(state: StateInline, silent: bool) -> bool: pos = state.pos - ch = state.srcCharCode[pos] - # /* ` */ - if ch != 0x60: + if state.src[pos] != "`": return False start = pos @@ -19,7 +17,7 @@ def backtick(state: StateInline, silent: bool) -> bool: maximum = state.posMax # scan marker length - while pos < maximum and (state.srcCharCode[pos] == 0x60): # /* ` */ + while pos < maximum and (state.src[pos] == "`"): pos += 1 marker = state.src[start:pos] @@ -42,7 +40,7 @@ def backtick(state: StateInline, silent: bool) -> bool: matchEnd = matchStart + 1 # scan marker length - while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60): # /* ` */ + while matchEnd < maximum and (state.src[matchEnd] == "`"): matchEnd += 1 closerLength = matchEnd - matchStart diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index d21b494c..56b94b6b 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -8,23 +8,22 @@ def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = state.srcCharCode[start] + marker = state.src[start] if silent: return False - # /* _ */ /* * */ - if marker != 0x5F and marker != 0x2A: + if marker not in ("_", "*"): return False - scanned = state.scanDelims(state.pos, marker == 0x2A) + scanned = state.scanDelims(state.pos, marker == "*") for i in range(scanned.length): token = state.push("text", "", 0) - token.content = chr(marker) + token.content = marker state.delimiters.append( Delimiter( - marker=marker, + marker=ord(marker), length=scanned.length, jump=i, token=len(state.tokens) - 1, diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 1e5d0ea0..d3b5f6bb 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -13,13 +13,11 @@ def entity(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax - if state.srcCharCode[pos] != 0x26: # /* & */ + if state.src[pos] != "&": return False if (pos + 1) < maximum: - ch = state.srcCharCode[pos + 1] - - if ch == 0x23: # /* # */ + if state.src[pos + 1] == "#": match = DIGITAL_RE.search(state.src[pos:]) if match: if not silent: diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 1767e01d..8694cec1 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -1,42 +1,72 @@ """ Process escaped chars and hardbreaks """ -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_inline import StateInline -ESCAPED = [0 for _ in range(256)] -for ch in "\\!\"#$%&'()*+,./:;<=>?@[]^_`{|}~-": - ESCAPED[ord(ch)] = 1 +_ESCAPED = { + "!", + '"', + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "|", + "}", + "~", +} def escape(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax - # /* \ */ - if state.srcCharCode[pos] != 0x5C: + if state.src[pos] != "\\": return False pos += 1 if pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if ch < 256 and ESCAPED[ch] != 0: + if ch in _ESCAPED: if not silent: state.pending += state.src[pos] state.pos += 2 return True - if ch == 0x0A: + if ch == "\n": if not silent: state.push("hardbreak", "br", 0) pos += 1 # skip leading whitespaces from next line while pos < maximum: - ch = state.srcCharCode[pos] - if not isSpace(ch): + ch = state.src[pos] + if not isStrSpace(ch): break pos += 1 diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 6a636684..3c8b5331 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -17,17 +17,12 @@ def html_inline(state: StateInline, silent: bool) -> bool: # Check start maximum = state.posMax - if state.srcCharCode[pos] != 0x3C or pos + 2 >= maximum: # /* < */ + if state.src[pos] != "<" or pos + 2 >= maximum: return False # Quick fail on second char - ch = state.srcCharCode[pos + 1] - if ( - ch != 0x21 - and ch != 0x3F # /* ! */ - and ch != 0x2F # /* ? */ - and not isLetter(ch) # /* / */ - ): + ch = state.src[pos + 1] + if ch not in ("!", "?", "/") and not isLetter(ord(ch)): # /* / */ return False match = HTML_TAG_RE.search(state.src[pos:]) diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index 0cb14ffd..b4a32a9f 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -1,7 +1,7 @@ # Process ![image]( "title") from __future__ import annotations -from ..common.utils import isSpace, normalizeReference +from ..common.utils import isStrSpace, normalizeReference from ..token import Token from .state_inline import StateInline @@ -12,11 +12,10 @@ def image(state: StateInline, silent: bool) -> bool: oldPos = state.pos max = state.posMax - # /* ! */ - if state.srcCharCode[state.pos] != 0x21: + if state.src[state.pos] != "!": return False - # /* [ */ - if state.pos + 1 < state.posMax and state.srcCharCode[state.pos + 1] != 0x5B: + + if state.pos + 1 < state.posMax and state.src[state.pos + 1] != "[": return False labelStart = state.pos + 2 @@ -27,8 +26,8 @@ def image(state: StateInline, silent: bool) -> bool: return False pos = labelEnd + 1 - # /* ( */ - if pos < max and state.srcCharCode[pos] == 0x28: + + if pos < max and state.src[pos] == "(": # # Inline link # @@ -37,8 +36,8 @@ def image(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces pos += 1 while pos < max: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -60,8 +59,8 @@ def image(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces start = pos while pos < max: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -75,15 +74,14 @@ def image(state: StateInline, silent: bool) -> bool: # [link]( "title" ) # ^^ skipping these spaces while pos < max: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 else: title = "" - # /* ) */ - if pos >= max or state.srcCharCode[pos] != 0x29: + if pos >= max or state.src[pos] != ")": state.pos = oldPos return False @@ -97,7 +95,7 @@ def image(state: StateInline, silent: bool) -> bool: return False # /* [ */ - if pos < max and state.srcCharCode[pos] == 0x5B: + if pos < max and state.src[pos] == "[": start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index c4548ccd..18c0736c 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -1,6 +1,6 @@ # Process [link]( "stuff") -from ..common.utils import isSpace, normalizeReference +from ..common.utils import isStrSpace, normalizeReference from .state_inline import StateInline @@ -13,7 +13,7 @@ def link(state: StateInline, silent: bool) -> bool: start = state.pos parseReference = True - if state.srcCharCode[state.pos] != 0x5B: # /* [ */ + if state.src[state.pos] != "[": return False labelStart = state.pos + 1 @@ -25,7 +25,7 @@ def link(state: StateInline, silent: bool) -> bool: pos = labelEnd + 1 - if pos < maximum and state.srcCharCode[pos] == 0x28: # /* ( */ + if pos < maximum and state.src[pos] == "(": # # Inline link # @@ -37,8 +37,8 @@ def link(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces pos += 1 while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -60,8 +60,8 @@ def link(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces start = pos while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -75,12 +75,12 @@ def link(state: StateInline, silent: bool) -> bool: # [link]( "title" ) # ^^ skipping these spaces while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 - if pos >= maximum or state.srcCharCode[pos] != 0x29: # /* ) */ + if pos >= maximum or state.src[pos] != ")": # parsing a valid shortcut link failed, fallback to reference parseReference = True @@ -93,7 +93,7 @@ def link(state: StateInline, silent: bool) -> bool: if "references" not in state.env: return False - if pos < maximum and state.srcCharCode[pos] == 0x5B: # /* [ */ + if pos < maximum and state.src[pos] == "[": start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index 4c440579..dede7251 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -1,7 +1,7 @@ # Proceess '\n' import re -from ..common.utils import charCodeAt, isSpace +from ..common.utils import charStrAt, isStrSpace from .state_inline import StateInline endSpace = re.compile(r" +$") @@ -10,8 +10,7 @@ def newline(state: StateInline, silent: bool) -> bool: pos = state.pos - # /* \n */ - if state.srcCharCode[pos] != 0x0A: + if state.src[pos] != "\n": return False pmax = len(state.pending) - 1 @@ -22,8 +21,8 @@ def newline(state: StateInline, silent: bool) -> bool: # Pending string is stored in concat mode, indexed lookups will cause # conversion to flat mode. if not silent: - if pmax >= 0 and charCodeAt(state.pending, pmax) == 0x20: - if pmax >= 1 and charCodeAt(state.pending, pmax - 1) == 0x20: + if pmax >= 0 and charStrAt(state.pending, pmax) == " ": + if pmax >= 1 and charStrAt(state.pending, pmax - 1) == " ": state.pending = endSpace.sub("", state.pending) state.push("hardbreak", "br", 0) else: @@ -36,7 +35,7 @@ def newline(state: StateInline, silent: bool) -> bool: pos += 1 # skip heading spaces for next line - while pos < maximum and isSpace(state.srcCharCode[pos]): + while pos < maximum and isStrSpace(state.src[pos]): pos += 1 state.pos = pos diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 12e1d934..ef23f85d 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -132,24 +132,24 @@ def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: """ pos = start maximum = self.posMax - marker = self.srcCharCode[start] + marker = self.src[start] # treat beginning of the line as a whitespace - lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20 + lastChar = self.src[start - 1] if start > 0 else " " - while pos < maximum and self.srcCharCode[pos] == marker: + while pos < maximum and self.src[pos] == marker: pos += 1 count = pos - start # treat end of the line as a whitespace - nextChar = self.srcCharCode[pos] if pos < maximum else 0x20 + nextChar = self.src[pos] if pos < maximum else " " - isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) - isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar) + isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar) - isLastWhiteSpace = isWhiteSpace(lastChar) - isNextWhiteSpace = isWhiteSpace(nextChar) + isLastWhiteSpace = isWhiteSpace(ord(lastChar)) + isNextWhiteSpace = isWhiteSpace(ord(nextChar)) left_flanking = not ( isNextWhiteSpace diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index 8b080816..f671412c 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -7,17 +7,16 @@ def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = state.srcCharCode[start] + ch = state.src[start] if silent: return False - if marker != 0x7E: # /* ~ */ + if ch != "~": return False scanned = state.scanDelims(state.pos, True) length = scanned.length - ch = chr(marker) if length < 2: return False @@ -33,15 +32,13 @@ def tokenize(state: StateInline, silent: bool) -> bool: token.content = ch + ch state.delimiters.append( Delimiter( - **{ - "marker": marker, - "length": 0, # disable "rule of 3" length checks meant for emphasis - "jump": i // 2, # for `~~` 1 marker = 2 characters - "token": len(state.tokens) - 1, - "end": -1, - "open": scanned.can_open, - "close": scanned.can_close, - } + marker=ord(ch), + length=0, # disable "rule of 3" length checks meant for emphasis + jump=i // 2, # for `~~` 1 marker = 2 characters + token=len(state.tokens) - 1, + end=-1, + open=scanned.can_open, + close=scanned.can_close, ) ) diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index bdf55310..f306b2e4 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -1,50 +1,45 @@ # Skip text characters for text token, place those to pending buffer # and increment current pos -from typing import Any - from .state_inline import StateInline # Rule to skip pure text # '{}$%@~+=:' reserved for extensions -# !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~ - # !!!! Don't confuse with "Markdown ASCII Punctuation" chars # http://spec.commonmark.org/0.15/#ascii-punctuation-character -def isTerminatorChar(ch: int) -> bool: - return ch in { - 0x0A, # /* \n */: - 0x21, # /* ! */: - 0x23, # /* # */: - 0x24, # /* $ */: - 0x25, # /* % */: - 0x26, # /* & */: - 0x2A, # /* * */: - 0x2B, # /* + */: - 0x2D, # /* - */: - 0x3A, # /* : */: - 0x3C, # /* < */: - 0x3D, # /* = */: - 0x3E, # /* > */: - 0x40, # /* @ */: - 0x5B, # /* [ */: - 0x5C, # /* \ */: - 0x5D, # /* ] */: - 0x5E, # /* ^ */: - 0x5F, # /* _ */: - 0x60, # /* ` */: - 0x7B, # /* { */: - 0x7D, # /* } */: - 0x7E, # /* ~ */: - } - - -def text(state: StateInline, silent: bool, **args: Any) -> bool: +_TerminatorChars = { + "\n", + "!", + "#", + "$", + "%", + "&", + "*", + "+", + "-", + ":", + "<", + "=", + ">", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "}", + "~", +} + + +def text(state: StateInline, silent: bool) -> bool: pos = state.pos posMax = state.posMax - while (pos < posMax) and not isTerminatorChar(state.srcCharCode[pos]): + while (pos < posMax) and state.src[pos] not in _TerminatorChars: pos += 1 if pos == state.pos: diff --git a/scripts/profiler.py b/scripts/profiler.py index 414a7727..a593baa1 100644 --- a/scripts/profiler.py +++ b/scripts/profiler.py @@ -9,7 +9,7 @@ from markdown_it import MarkdownIt commonmark_spec = ( - (Path(__file__).parent / "tests" / "test_cmark_spec" / "spec.md") + (Path(__file__).parent.parent / "tests" / "test_cmark_spec" / "spec.md") .read_bytes() .decode() ) From 36a428b280b326bf6bfd98b657be2e408d2a87ab Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 1 Jun 2023 05:03:53 +0200 Subject: [PATCH 45/97] =?UTF-8?q?=F0=9F=91=8C=20Improve=20performance=20of?= =?UTF-8?q?=20`skipSpaces`/`skipChars`=20(#271)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't compute src length on every iteration --- markdown_it/rules_block/state_block.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 96a2f88f..445ad265 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -147,8 +147,12 @@ def skipEmptyLines(self, from_pos: int) -> int: def skipSpaces(self, pos: int) -> int: """Skip spaces from given position.""" - while pos < len(self.src): - if not isStrSpace(self.src[pos]): + while True: + try: + current = self.src[pos] + except IndexError: + break + if not isStrSpace(current): break pos += 1 return pos @@ -165,16 +169,24 @@ def skipSpacesBack(self, pos: int, minimum: int) -> int: def skipChars(self, pos: int, code: int) -> int: """Skip character code from given position.""" - while pos < len(self.src): - if self.srcCharCode[pos] != code: + while True: + try: + current = self.srcCharCode[pos] + except IndexError: + break + if current != code: break pos += 1 return pos def skipCharsStr(self, pos: int, ch: str) -> int: """Skip character string from given position.""" - while pos < len(self.src): - if self.src[pos] != ch: + while True: + try: + current = self.src[pos] + except IndexError: + break + if current != ch: break pos += 1 return pos From 4e6dfd5994bc765c3d4d5c308e10831c86a1452f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 09:19:23 +0200 Subject: [PATCH 46/97] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20numeric=20character?= =?UTF-8?q?=20reference=20passing=20(#272)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix issue with incorrect determination of a numeric character reference, and subsequent failure to convert to an integer code. From https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py, fixes issue 55371 This also essentially fixes a bug in upstream, see https://github.com/markdown-it/markdown-it/issues/935 --- markdown_it/common/utils.py | 74 +++++++------------------ tests/test_fuzzer.py | 14 ++--- tests/test_port/fixtures/issue-fixes.md | 9 +++ 3 files changed, 36 insertions(+), 61 deletions(-) diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 4effc00f..6bf9a36f 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -2,7 +2,6 @@ """ from __future__ import annotations -import html import re from typing import Match, TypeVar @@ -52,9 +51,6 @@ def arrayReplaceAt( return src[:pos] + newElements + src[pos + 1 :] -###################################################################### - - def isValidEntityCode(c: int) -> bool: # broken sequence if c >= 0xD800 and c <= 0xDFFF: @@ -89,47 +85,33 @@ def fromCodePoint(c: int) -> str: return chr(c) -UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])') +# UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])') # ENTITY_RE_g = re.compile(r'&([a-z#][a-z0-9]{1,31})', re.IGNORECASE) UNESCAPE_ALL_RE = re.compile( r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' + "|" + r"&([a-z#][a-z0-9]{1,31});", re.IGNORECASE, ) -DIGITAL_ENTITY_TEST_RE = re.compile(r"^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))", re.IGNORECASE) +DIGITAL_ENTITY_BASE10_RE = re.compile(r"#([0-9]{1,8})") +DIGITAL_ENTITY_BASE16_RE = re.compile(r"#x([a-f0-9]{1,8})", re.IGNORECASE) def replaceEntityPattern(match: str, name: str) -> str: - """Convert HTML entity patterns - - :: - - https://www.google.com -> https%3A//www.google.com - + """Convert HTML entity patterns, + see https://spec.commonmark.org/0.30/#entity-references """ - code = 0 - if name in entities: return entities[name] - if name[0] == "#" and DIGITAL_ENTITY_TEST_RE.search(name): - code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10) - if isValidEntityCode(code): - return fromCodePoint(code) - - return match - - -# def replaceEntities(string): -# if (string.indexOf('&') < 0): -# return string -# return string.replace(ENTITY_RE, replaceEntityPattern) + code: None | int = None + if pat := DIGITAL_ENTITY_BASE10_RE.fullmatch(name): + code = int(pat.group(1), 10) + elif pat := DIGITAL_ENTITY_BASE16_RE.fullmatch(name): + code = int(pat.group(1), 16) + if code is not None and isValidEntityCode(code): + return fromCodePoint(code) -def unescapeMd(string: str) -> str: - raise NotImplementedError - # if "\\" in string: - # return string - # return string.replace(UNESCAPE_MD_RE, "$1") + return match def unescapeAll(string: str) -> str: @@ -154,30 +136,14 @@ def stripEscape(string: str) -> str: return ESCAPE_CHAR.sub(r"\1", string) -# ////////////////////////////////////////////////////////////////////////////// - -# TODO This section changed quite a lot, should re-check - -# UNESCAPE_HTML_RE = re.compile(r"\\&(?=(amp\;|lt\;|gt\;|quot\;))") -# ESCAPE_AND_HTML = re.compile(r"&(?!(amp\;|lt\;|gt\;|quot\;))") -# HTML_ESCAPE_REPLACE_RE = re.compile(r'[&<>"]') - - -# def escapeHtml(string: str): - -# if HTML_ESCAPE_REPLACE_RE.search(string): - -# string = UNESCAPE_HTML_RE.sub("&", string) -# string = ESCAPE_AND_HTML.sub("&", string) -# for k, v in {"<": "<", ">": ">", '"': """}.items(): -# string = string.replace(k, v) - -# return string - - def escapeHtml(raw: str) -> str: - # return html.escape(html.unescape(raw)).replace("'", "'") - return html.escape(raw).replace("'", "'") + """Replace special characters "&", "<", ">" and '"' to HTML-safe sequences.""" + # like html.escape, but without escaping single quotes + raw = raw.replace("&", "&") # Must be done first! + raw = raw.replace("<", "<") + raw = raw.replace(">", ">") + raw = raw.replace('"', """) + return raw # ////////////////////////////////////////////////////////////////////////////// diff --git a/tests/test_fuzzer.py b/tests/test_fuzzer.py index 60cdddaa..f3666cc5 100644 --- a/tests/test_fuzzer.py +++ b/tests/test_fuzzer.py @@ -10,15 +10,15 @@ from markdown_it import MarkdownIt TESTS = { - 55363: ">```\n>", - 55367: ">-\n>\n>", - # 55371: "[](so»0;!" TODO this did not fail - # 55401: "?c_" * 100_000 TODO this did not fail + 55363: (">```\n>", "
    \n
    \n
    \n"), + 55367: (">-\n>\n>", "
    \n
      \n
    • \n
    \n
    \n"), + 55371: ("[](soH0;!", "

    [](so&#4H0;!

    \n"), + # 55401: (("?c_" * 100000) + "c_", ""), TODO this does not fail, just takes a long time } -@pytest.mark.parametrize("raw_input", TESTS.values(), ids=TESTS.keys()) -def test_fuzzing(raw_input): +@pytest.mark.parametrize("raw_input,expected", TESTS.values(), ids=TESTS.keys()) +def test_fuzzing(raw_input, expected): md = MarkdownIt() md.parse(raw_input) - print(md.render(raw_input)) + assert md.render(raw_input) == expected diff --git a/tests/test_port/fixtures/issue-fixes.md b/tests/test_port/fixtures/issue-fixes.md index 319945af..b630fcee 100644 --- a/tests/test_port/fixtures/issue-fixes.md +++ b/tests/test_port/fixtures/issue-fixes.md @@ -45,3 +45,12 @@ Fix CVE-2023-26303

    . + +Fix parsing of incorrect numeric character references +. +[]("y;) "y; +[](#y;) #y; +. +

    &#X22y; + &#35y;

    +. From eb96da144db03319bacdda9dc765d8cd66f50dd3 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 13:34:19 +0200 Subject: [PATCH 47/97] =?UTF-8?q?=F0=9F=91=8C=20Improve=20nested=20emphasi?= =?UTF-8?q?s=20parsing=20(#273)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes quadratic complexity in e.g. `**<...>**a**<...>**` Implementation of upstream commit: https://github.com/markdown-it/markdown-it/commit/24abaa51a605c2de14da59893797733921f09bb8 --- markdown_it/port.yaml | 6 ++-- markdown_it/rules_inline/balance_pairs.py | 43 +++++++++++++++++------ markdown_it/rules_inline/emphasis.py | 7 ++-- markdown_it/rules_inline/state_inline.py | 8 ----- markdown_it/rules_inline/strikethrough.py | 1 - 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 945a19f6..679d13b7 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it - version: 12.2.0 - commit: 6e2de08a0b03d3d0dcc524b89710ce05f83a0283 - date: Aug 2, 2021 + version: 12.3.0 + commit: 2e31d3430187d2eee1ba120c954783eebb93b4e8 + date: Dec 9, 2021 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index 6125de71..bbb2101c 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -6,13 +6,33 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: """For each opening emphasis-like marker find a matching closing one.""" + if not delimiters: + return + openersBottom = {} maximum = len(delimiters) + # headerIdx is the first delimiter of the current (where closer is) delimiter run + headerIdx = 0 + lastTokenIdx = -2 # needs any value lower than -1 + jumps: list[int] = [] closerIdx = 0 while closerIdx < maximum: closer = delimiters[closerIdx] + jumps.append(0) + + # markers belong to same delimiter run if: + # - they have adjacent tokens + # - AND markers are the same + # + if ( + delimiters[headerIdx].marker != closer.marker + or lastTokenIdx != closer.token - 1 + ): + headerIdx = closerIdx + lastTokenIdx = closer.token + # Length is only used for emphasis-specific "rule of 3", # if it's not defined (in strikethrough or 3rd party plugins), # we can default it to 0 to disable those checks. @@ -34,12 +54,7 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: (3 if closer.open else 0) + (closer.length % 3) ] - openerIdx = closerIdx - closer.jump - 1 - - # avoid crash if `closer.jump` is pointing outside of the array, - # e.g. for strikethrough - if openerIdx < -1: - openerIdx = -1 + openerIdx = headerIdx - jumps[headerIdx] - 1 newMinOpenerIdx = openerIdx @@ -47,7 +62,7 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: opener = delimiters[openerIdx] if opener.marker != closer.marker: - openerIdx -= opener.jump + 1 + openerIdx -= jumps[openerIdx] + 1 continue if opener.open and opener.end < 0: @@ -73,19 +88,25 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: # sure algorithm has linear complexity (see *_*_*_*_*_... case). # if openerIdx > 0 and not delimiters[openerIdx - 1].open: - lastJump = delimiters[openerIdx - 1].jump + 1 + lastJump = jumps[openerIdx - 1] + 1 else: lastJump = 0 - closer.jump = closerIdx - openerIdx + lastJump + jumps[closerIdx] = closerIdx - openerIdx + lastJump + jumps[openerIdx] = lastJump + closer.open = False opener.end = closerIdx - opener.jump = lastJump opener.close = False newMinOpenerIdx = -1 + + # treat next token as start of run, + # it optimizes skips in **<...>**a**<...>** pathological case + lastTokenIdx = -2 + break - openerIdx -= opener.jump + 1 + openerIdx -= jumps[openerIdx] + 1 if newMinOpenerIdx != -1: # If match for this delimiter run failed, we want to set lower bound for diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index 56b94b6b..9a98f9e2 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -18,14 +18,13 @@ def tokenize(state: StateInline, silent: bool) -> bool: scanned = state.scanDelims(state.pos, marker == "*") - for i in range(scanned.length): + for _ in range(scanned.length): token = state.push("text", "", 0) token.content = marker state.delimiters.append( Delimiter( marker=ord(marker), length=scanned.length, - jump=i, token=len(state.tokens) - 1, end=-1, open=scanned.can_open, @@ -63,9 +62,11 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: isStrong = ( i > 0 and delimiters[i - 1].end == startDelim.end + 1 + # check that first two markers match and adjacent + and delimiters[i - 1].marker == startDelim.marker and delimiters[i - 1].token == startDelim.token - 1 + # check that last two markers are adjacent (we can safely assume they match) and delimiters[startDelim.end + 1].token == endDelim.token + 1 - and delimiters[i - 1].marker == startDelim.marker ) ch = chr(startDelim.marker) diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index ef23f85d..143ab33e 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -22,14 +22,6 @@ class Delimiter: # Total length of these series of delimiters. length: int - # An amount of characters before this one that's equivalent to - # current one. In plain English: if this delimiter does not open - # an emphasis, neither do previous `jump` characters. - # - # Used to skip sequences like "*****" in one step, for 1st asterisk - # value will be 0, for 2nd it's 1 and so on. - jump: int - # A position of the token this delimiter corresponds to. token: int diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index f671412c..ec816281 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -34,7 +34,6 @@ def tokenize(state: StateInline, silent: bool) -> bool: Delimiter( marker=ord(ch), length=0, # disable "rule of 3" length checks meant for emphasis - jump=i // 2, # for `~~` 1 marker = 2 characters token=len(state.tokens) - 1, end=-1, open=scanned.can_open, From 84dcefe28712c2488e4f1a1c1a03afe5222a70c0 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 13:43:54 +0200 Subject: [PATCH 48/97] =?UTF-8?q?=F0=9F=90=9B=20Fix:=20tab=20preventing=20?= =?UTF-8?q?paragraph=20continuation=20in=20lists=20(#274)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements upstream: https://github.com/markdown-it/markdown-it/commit/1cd8a5143b22967a7583bba19678900efdf72adf --- markdown_it/port.yaml | 6 +++--- markdown_it/rules_block/list.py | 2 +- tests/test_port/fixtures/commonmark_extras.md | 21 +++++++++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 679d13b7..c0a4d70b 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it - version: 12.3.0 - commit: 2e31d3430187d2eee1ba120c954783eebb93b4e8 - date: Dec 9, 2021 + version: 12.3.1 + commit: 76469e83dc1a1e3ed943b483b554003a666bddf7 + date: Jan 7, 2022 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index a5c596bb..d8070d74 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -130,7 +130,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if ( silent and state.parentType == "paragraph" - and state.tShift[startLine] >= state.blkIndent + and state.sCount[startLine] >= state.blkIndent ): isTerminatingParagraph = True diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md index 168b039d..5d13d859 100644 --- a/tests/test_port/fixtures/commonmark_extras.md +++ b/tests/test_port/fixtures/commonmark_extras.md @@ -166,6 +166,27 @@ Regression test (code block + regular paragraph) . +Regression test (tabs in lists, #830) +. +1. asd + 2. asd + +--- + +1. asd + 2. asd +. +
      +
    1. asd +2. asd
    2. +
    +
    +
      +
    1. asd +2. asd
    2. +
    +. + Blockquotes inside indented lists should terminate correctly . - a From 500e69e6d0a49a6970cb277802772c0317bf9f2a Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 14:03:52 +0200 Subject: [PATCH 49/97] =?UTF-8?q?=F0=9F=91=8C=20fix=20possible=20ReDOS=20i?= =?UTF-8?q?n=20newline=20rule=20(#275)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements upstream: https://github.com/markdown-it/markdown-it/commit/ffc49ab46b5b751cd2be0aabb146f2ef84986101 --- markdown_it/port.yaml | 6 +++--- markdown_it/rules_inline/newline.py | 13 +++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index c0a4d70b..a553fe1a 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it - version: 12.3.1 - commit: 76469e83dc1a1e3ed943b483b554003a666bddf7 - date: Jan 7, 2022 + version: 12.3.2 + commit: d72c68b520cedacae7878caa92bf7fe32e3e0e6f + date: Jan 8, 2022 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index dede7251..ca8f1db0 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -1,11 +1,7 @@ -# Proceess '\n' -import re - +"""Proceess '\n'.""" from ..common.utils import charStrAt, isStrSpace from .state_inline import StateInline -endSpace = re.compile(r" +$") - def newline(state: StateInline, silent: bool) -> bool: pos = state.pos @@ -23,7 +19,12 @@ def newline(state: StateInline, silent: bool) -> bool: if not silent: if pmax >= 0 and charStrAt(state.pending, pmax) == " ": if pmax >= 1 and charStrAt(state.pending, pmax - 1) == " ": - state.pending = endSpace.sub("", state.pending) + # Find whitespaces tail of pending chars. + ws = pmax - 1 + while ws >= 1 and charStrAt(state.pending, ws - 1) == " ": + ws -= 1 + state.pending = state.pending[:ws] + state.push("hardbreak", "br", 0) else: state.pending = state.pending[:-1] From ba96f34dc14c0d8cd274f1c9d9e56f2187707710 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 15:18:38 +0200 Subject: [PATCH 50/97] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor=20backslash?= =?UTF-8?q?=20escape=20logic=20(#276)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- markdown_it/parser_core.py | 11 ++- markdown_it/parser_inline.py | 9 +- markdown_it/presets/commonmark.py | 4 +- markdown_it/presets/zero.py | 7 +- markdown_it/rules_core/__init__.py | 2 + markdown_it/rules_core/text_join.py | 34 +++++++ markdown_it/rules_inline/__init__.py | 4 +- markdown_it/rules_inline/escape.py | 91 +++++++++++-------- .../{text_collapse.py => fragments_join.py} | 2 +- tests/test_api/test_main.py | 35 +++---- tests/test_port/fixtures/linkify.md | 9 +- tests/test_port/fixtures/smartquotes.md | 15 ++- tests/test_port/fixtures/typographer.md | 17 ++++ 13 files changed, 173 insertions(+), 67 deletions(-) create mode 100644 markdown_it/rules_core/text_join.py rename markdown_it/rules_inline/{text_collapse.py => fragments_join.py} (96%) diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index 251b7634..b3eb8abe 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -7,7 +7,15 @@ from __future__ import annotations from .ruler import RuleFunc, Ruler -from .rules_core import block, inline, linkify, normalize, replace, smartquotes +from .rules_core import ( + block, + inline, + linkify, + normalize, + replace, + smartquotes, + text_join, +) from .rules_core.state_core import StateCore _rules: list[tuple[str, RuleFunc]] = [ @@ -17,6 +25,7 @@ ("linkify", linkify), ("replacements", replace), ("smartquotes", smartquotes), + ("text_join", text_join), ] diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index a8228524..88140d3d 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -28,11 +28,18 @@ ("entity", rules_inline.entity), ] +# Note `rule2` ruleset was created specifically for emphasis/strikethrough +# post-processing and may be changed in the future. +# +# Don't use this for anything except pairs (plugins working with `balance_pairs`). +# _rules2: list[tuple[str, RuleFunc]] = [ ("balance_pairs", rules_inline.link_pairs), ("strikethrough", rules_inline.strikethrough.postProcess), ("emphasis", rules_inline.emphasis.postProcess), - ("text_collapse", rules_inline.text_collapse), + # rules for pairs separate '**' into its own text tokens, which may be left unused, + # rule below merges unused segments back with the rest of the text + ("fragments_join", rules_inline.fragments_join), ] diff --git a/markdown_it/presets/commonmark.py b/markdown_it/presets/commonmark.py index 60a39250..3990d434 100644 --- a/markdown_it/presets/commonmark.py +++ b/markdown_it/presets/commonmark.py @@ -40,7 +40,7 @@ def make() -> PresetType: "highlight": None, }, "components": { - "core": {"rules": ["normalize", "block", "inline"]}, + "core": {"rules": ["normalize", "block", "inline", "text_join"]}, "block": { "rules": [ "blockquote", @@ -68,7 +68,7 @@ def make() -> PresetType: "newline", "text", ], - "rules2": ["balance_pairs", "emphasis", "text_collapse"], + "rules2": ["balance_pairs", "emphasis", "fragments_join"], }, }, } diff --git a/markdown_it/presets/zero.py b/markdown_it/presets/zero.py index fcc5eb3a..2f69a58d 100644 --- a/markdown_it/presets/zero.py +++ b/markdown_it/presets/zero.py @@ -33,8 +33,11 @@ def make() -> PresetType: "highlight": None, }, "components": { - "core": {"rules": ["normalize", "block", "inline"]}, + "core": {"rules": ["normalize", "block", "inline", "text_join"]}, "block": {"rules": ["paragraph"]}, - "inline": {"rules": ["text"], "rules2": ["balance_pairs", "text_collapse"]}, + "inline": { + "rules": ["text"], + "rules2": ["balance_pairs", "fragments_join"], + }, }, } diff --git a/markdown_it/rules_core/__init__.py b/markdown_it/rules_core/__init__.py index f80034c5..c9c5368c 100644 --- a/markdown_it/rules_core/__init__.py +++ b/markdown_it/rules_core/__init__.py @@ -6,6 +6,7 @@ "replace", "smartquotes", "linkify", + "text_join", ) from .block import block @@ -15,3 +16,4 @@ from .replacements import replace from .smartquotes import smartquotes from .state_core import StateCore +from .text_join import text_join diff --git a/markdown_it/rules_core/text_join.py b/markdown_it/rules_core/text_join.py new file mode 100644 index 00000000..d54ccbbc --- /dev/null +++ b/markdown_it/rules_core/text_join.py @@ -0,0 +1,34 @@ +"""Join raw text tokens with the rest of the text + +This is set as a separate rule to provide an opportunity for plugins +to run text replacements after text join, but before escape join. + +For example, `\\:)` shouldn't be replaced with an emoji. +""" +from __future__ import annotations + +from ..token import Token +from .state_core import StateCore + + +def text_join(state: StateCore) -> None: + """Join raw text for escape sequences (`text_special`) tokens with the rest of the text""" + + for inline_token in state.tokens[:]: + if inline_token.type != "inline": + continue + + # convert text_special to text and join all adjacent text nodes + new_tokens: list[Token] = [] + for child_token in inline_token.children or []: + if child_token.type == "text_special": + child_token.type = "text" + if ( + child_token.type == "text" + and new_tokens + and new_tokens[-1].type == "text" + ): + new_tokens[-1].content += child_token.content + else: + new_tokens.append(child_token) + inline_token.children = new_tokens diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py index f27907ce..dde97d34 100644 --- a/markdown_it/rules_inline/__init__.py +++ b/markdown_it/rules_inline/__init__.py @@ -1,7 +1,7 @@ __all__ = ( "StateInline", "text", - "text_collapse", + "fragments_join", "link_pairs", "escape", "newline", @@ -20,10 +20,10 @@ from .balance_pairs import link_pairs from .entity import entity from .escape import escape +from .fragments_join import fragments_join from .html_inline import html_inline from .image import image from .link import link from .newline import newline from .state_inline import StateInline from .text import text -from .text_collapse import text_collapse diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 8694cec1..9f68b5df 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -4,6 +4,58 @@ from ..common.utils import isStrSpace from .state_inline import StateInline + +def escape(state: StateInline, silent: bool) -> bool: + """Process escaped chars and hardbreaks.""" + pos = state.pos + maximum = state.posMax + + if state.src[pos] != "\\": + return False + + pos += 1 + + # '\' at the end of the inline block + if pos >= maximum: + return False + + ch1 = state.src[pos] + ch1_ord = ord(ch1) + if ch1 == "\n": + if not silent: + state.push("hardbreak", "br", 0) + pos += 1 + # skip leading whitespaces from next line + while pos < maximum: + ch = state.src[pos] + if not isStrSpace(ch): + break + pos += 1 + + state.pos = pos + return True + + escapedStr = state.src[pos] + + if ch1_ord >= 0xD800 and ch1_ord <= 0xDBFF and pos + 1 < maximum: + ch2 = state.src[pos + 1] + ch2_ord = ord(ch2) + if ch2_ord >= 0xDC00 and ch2_ord <= 0xDFFF: + escapedStr += ch2 + pos += 1 + + origStr = "\\" + escapedStr + + if not silent: + token = state.push("text_special", "", 0) + token.content = escapedStr if ch1 in _ESCAPED else origStr + token.markup = origStr + token.info = "escape" + + state.pos = pos + 1 + return True + + _ESCAPED = { "!", '"', @@ -38,42 +90,3 @@ "}", "~", } - - -def escape(state: StateInline, silent: bool) -> bool: - pos = state.pos - maximum = state.posMax - - if state.src[pos] != "\\": - return False - - pos += 1 - - if pos < maximum: - ch = state.src[pos] - - if ch in _ESCAPED: - if not silent: - state.pending += state.src[pos] - state.pos += 2 - return True - - if ch == "\n": - if not silent: - state.push("hardbreak", "br", 0) - - pos += 1 - # skip leading whitespaces from next line - while pos < maximum: - ch = state.src[pos] - if not isStrSpace(ch): - break - pos += 1 - - state.pos = pos - return True - - if not silent: - state.pending += "\\" - state.pos += 1 - return True diff --git a/markdown_it/rules_inline/text_collapse.py b/markdown_it/rules_inline/fragments_join.py similarity index 96% rename from markdown_it/rules_inline/text_collapse.py rename to markdown_it/rules_inline/fragments_join.py index e09289cf..f795c136 100644 --- a/markdown_it/rules_inline/text_collapse.py +++ b/markdown_it/rules_inline/fragments_join.py @@ -1,7 +1,7 @@ from .state_inline import StateInline -def text_collapse(state: StateInline) -> None: +def fragments_join(state: StateInline) -> None: """ Clean up tokens after emphasis and strikethrough postprocessing: merge adjacent text nodes into one and re-calculate all token levels diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py index c3a9ac8b..64a2bbe8 100644 --- a/tests/test_api/test_main.py +++ b/tests/test_api/test_main.py @@ -13,6 +13,7 @@ def test_get_rules(): "linkify", "replacements", "smartquotes", + "text_join", ], "block": [ "table", @@ -40,7 +41,7 @@ def test_get_rules(): "html_inline", "entity", ], - "inline2": ["balance_pairs", "strikethrough", "emphasis", "text_collapse"], + "inline2": ["balance_pairs", "strikethrough", "emphasis", "fragments_join"], } @@ -48,13 +49,13 @@ def test_load_presets(): md = MarkdownIt("zero") assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } md = MarkdownIt("commonmark") assert md.get_active_rules() == { - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "block": [ "code", "fence", @@ -79,7 +80,7 @@ def test_load_presets(): "html_inline", "entity", ], - "inline2": ["balance_pairs", "emphasis", "text_collapse"], + "inline2": ["balance_pairs", "emphasis", "fragments_join"], } @@ -94,16 +95,16 @@ def test_enable(): md = MarkdownIt("zero").enable("heading") assert md.get_active_rules() == { "block": ["heading", "paragraph"], - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } md.enable(["backticks", "autolink"]) assert md.get_active_rules() == { "block": ["heading", "paragraph"], - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "inline": ["text", "backticks", "autolink"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } @@ -111,16 +112,16 @@ def test_disable(): md = MarkdownIt("zero").disable("inline") assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block"], + "core": ["normalize", "block", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } md.disable(["text"]) assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block"], + "core": ["normalize", "block", "text_join"], "inline": [], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } @@ -130,15 +131,15 @@ def test_reset(): md.disable("inline") assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block"], + "core": ["normalize", "block", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } assert md.get_active_rules() == { "block": ["paragraph"], - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "text_join"], "inline": ["text"], - "inline2": ["balance_pairs", "text_collapse"], + "inline2": ["balance_pairs", "fragments_join"], } diff --git a/tests/test_port/fixtures/linkify.md b/tests/test_port/fixtures/linkify.md index 9edb78f3..c9755c03 100644 --- a/tests/test_port/fixtures/linkify.md +++ b/tests/test_port/fixtures/linkify.md @@ -96,4 +96,11 @@ after

    before

    github.com

    after

    -. \ No newline at end of file +. + +Don't match escaped +. +google\.com +. +

    google.com

    +. diff --git a/tests/test_port/fixtures/smartquotes.md b/tests/test_port/fixtures/smartquotes.md index 70378b8e..e77175aa 100644 --- a/tests/test_port/fixtures/smartquotes.md +++ b/tests/test_port/fixtures/smartquotes.md @@ -163,4 +163,17 @@ Should parse quotes adjacent to inline html, #677: .

    “test


    test”

    -. \ No newline at end of file +. + +Should be escapable: +. +"foo" + +\"foo" + +"foo\" +. +

    “foo”

    +

    "foo"

    +

    "foo"

    +. diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md index 39154ed0..59e48941 100644 --- a/tests/test_port/fixtures/typographer.md +++ b/tests/test_port/fixtures/typographer.md @@ -81,6 +81,13 @@ dupes-ellipsis

    !.. ?.. ,… !!!.. ???.. ,…

    . +copyright should be escapable +. +\(c) +. +

    (c)

    +. + dashes . @@ -101,6 +108,16 @@ markdownit--awesome

    markdownit–awesome

    . +dashes should be escapable +. +foo \-- bar + +foo -\- bar +. +

    foo -- bar

    +

    foo -- bar

    +. + regression tests for #624 . 1---2---3 From ea27cc86ca52c7ca1876fd1c550a518ecb61ecbe Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 20:02:17 +0200 Subject: [PATCH 51/97] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor:=20Add=20li?= =?UTF-8?q?nkifier=20rule=20to=20inline=20chain=20for=20full=20links=20(#2?= =?UTF-8?q?79)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes collision of emphasis and linkifier (so `http://example.org/foo._bar_-_baz` is now a single link, not emphasized). Emails and fuzzy links are not affected by this. Implements upstream: https://github.com/markdown-it/markdown-it/commit/6b58ec4245abe2e293c79bd7daabf4543ef46399 --- markdown_it/common/utils.py | 12 +++ markdown_it/parser_inline.py | 1 + markdown_it/presets/__init__.py | 2 +- markdown_it/rules_core/linkify.py | 66 ++++++++------ markdown_it/rules_inline/__init__.py | 2 + markdown_it/rules_inline/html_inline.py | 6 ++ markdown_it/rules_inline/link.py | 2 + markdown_it/rules_inline/linkify.py | 61 +++++++++++++ markdown_it/rules_inline/state_inline.py | 4 + tests/test_api/test_main.py | 1 + tests/test_port/fixtures/linkify.md | 107 +++++++++++++++++++++++ 11 files changed, 234 insertions(+), 30 deletions(-) create mode 100644 markdown_it/rules_inline/linkify.py diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 6bf9a36f..0d11e3e3 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -304,3 +304,15 @@ def normalizeReference(string: str) -> str: # most notably, `__proto__`) # return string.lower().upper() + + +LINK_OPEN_RE = re.compile(r"^\s]", flags=re.IGNORECASE) +LINK_CLOSE_RE = re.compile(r"^", flags=re.IGNORECASE) + + +def isLinkOpen(string: str) -> bool: + return bool(LINK_OPEN_RE.search(string)) + + +def isLinkClose(string: str) -> bool: + return bool(LINK_CLOSE_RE.search(string)) diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index 88140d3d..febe4e6e 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -16,6 +16,7 @@ # Parser rules _rules: list[tuple[str, RuleFunc]] = [ ("text", rules_inline.text), + ("linkify", rules_inline.linkify), ("newline", rules_inline.newline), ("escape", rules_inline.escape), ("backticks", rules_inline.backtick), diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py index f1cb0507..1e6796a2 100644 --- a/markdown_it/presets/__init__.py +++ b/markdown_it/presets/__init__.py @@ -21,7 +21,7 @@ def make() -> PresetType: config = commonmark.make() config["components"]["core"]["rules"].append("linkify") config["components"]["block"]["rules"].append("table") - config["components"]["inline"]["rules"].append("strikethrough") + config["components"]["inline"]["rules"].extend(["strikethrough", "linkify"]) config["components"]["inline"]["rules2"].append("strikethrough") config["options"]["linkify"] = True config["options"]["html"] = True diff --git a/markdown_it/rules_core/linkify.py b/markdown_it/rules_core/linkify.py index 49bb4ef3..efbc9d4c 100644 --- a/markdown_it/rules_core/linkify.py +++ b/markdown_it/rules_core/linkify.py @@ -1,41 +1,32 @@ +from __future__ import annotations + import re +from typing import Protocol -from ..common.utils import arrayReplaceAt +from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen from ..token import Token from .state_core import StateCore -LINK_OPEN_RE = re.compile(r"^\s]", flags=re.IGNORECASE) -LINK_CLOSE_RE = re.compile(r"^", flags=re.IGNORECASE) - HTTP_RE = re.compile(r"^http://") MAILTO_RE = re.compile(r"^mailto:") TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE) -def isLinkOpen(string: str) -> bool: - return bool(LINK_OPEN_RE.search(string)) - - -def isLinkClose(string: str) -> bool: - return bool(LINK_CLOSE_RE.search(string)) - - def linkify(state: StateCore) -> None: - blockTokens = state.tokens - + """Rule for identifying plain-text links.""" if not state.md.options.linkify: return if not state.md.linkify: raise ModuleNotFoundError("Linkify enabled but not installed.") - for j in range(len(blockTokens)): - if blockTokens[j].type != "inline" or not state.md.linkify.pretest( - blockTokens[j].content + for inline_token in state.tokens: + if inline_token.type != "inline" or not state.md.linkify.pretest( + inline_token.content ): continue - tokens = blockTokens[j].children + tokens = inline_token.children htmlLinkLevel = 0 @@ -71,38 +62,47 @@ def linkify(state: StateCore) -> None: currentToken.content ): text = currentToken.content - links = state.md.linkify.match(text) + links: list[_LinkType] = state.md.linkify.match(text) or [] # Now split string to nodes nodes = [] level = currentToken.level lastPos = 0 - for ln in range(len(links)): - url = links[ln].url + # forbid escape sequence at the start of the string, + # this avoids http\://example.com/ from being linkified as + # http://example.com/ + if ( + links + and links[0].index == 0 + and i > 0 + and tokens[i - 1].type == "text_special" + ): + links = links[1:] + + for link in links: + url = link.url fullUrl = state.md.normalizeLink(url) if not state.md.validateLink(fullUrl): continue - urlText = links[ln].text + urlText = link.text # Linkifier might send raw hostnames like "example.com", where url # starts with domain name. So we prepend http:// in those cases, # and remove it afterwards. - if not links[ln].schema: + if not link.schema: urlText = HTTP_RE.sub( "", state.md.normalizeLinkText("http://" + urlText) ) - elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search( - urlText - ): + elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText): urlText = MAILTO_RE.sub( "", state.md.normalizeLinkText("mailto:" + urlText) ) else: urlText = state.md.normalizeLinkText(urlText) - pos = links[ln].index + pos = link.index if pos > lastPos: token = Token("text", "", 0) @@ -130,7 +130,7 @@ def linkify(state: StateCore) -> None: token.info = "auto" nodes.append(token) - lastPos = links[ln].last_index + lastPos = link.last_index if lastPos < len(text): token = Token("text", "", 0) @@ -138,4 +138,12 @@ def linkify(state: StateCore) -> None: token.level = level nodes.append(token) - blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes) + inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes) + + +class _LinkType(Protocol): + url: str + text: str + index: int + last_index: int + schema: str | None diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py index dde97d34..3a8026ec 100644 --- a/markdown_it/rules_inline/__init__.py +++ b/markdown_it/rules_inline/__init__.py @@ -3,6 +3,7 @@ "text", "fragments_join", "link_pairs", + "linkify", "escape", "newline", "backtick", @@ -24,6 +25,7 @@ from .html_inline import html_inline from .image import image from .link import link +from .linkify import linkify from .newline import newline from .state_inline import StateInline from .text import text diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 3c8b5331..9065e1d0 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -1,5 +1,6 @@ # Process html tags from ..common.html_re import HTML_TAG_RE +from ..common.utils import isLinkClose, isLinkOpen from .state_inline import StateInline @@ -33,5 +34,10 @@ def html_inline(state: StateInline, silent: bool) -> bool: token = state.push("html_inline", "", 0) token.content = state.src[pos : pos + len(match.group(0))] + if isLinkOpen(token.content): + state.linkLevel += 1 + if isLinkClose(token.content): + state.linkLevel -= 1 + state.pos += len(match.group(0)) return True diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 18c0736c..78cf9122 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -140,7 +140,9 @@ def link(state: StateInline, silent: bool) -> bool: if label and state.md.options.get("store_labels", False): token.meta["label"] = label + state.linkLevel += 1 state.md.inline.tokenize(state) + state.linkLevel -= 1 token = state.push("link_close", "a", -1) diff --git a/markdown_it/rules_inline/linkify.py b/markdown_it/rules_inline/linkify.py new file mode 100644 index 00000000..a8a18153 --- /dev/null +++ b/markdown_it/rules_inline/linkify.py @@ -0,0 +1,61 @@ +"""Process links like https://example.org/""" +import re + +from .state_inline import StateInline + +# RFC3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +SCHEME_RE = re.compile(r"(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$", re.IGNORECASE) + + +def linkify(state: StateInline, silent: bool) -> bool: + """Rule for identifying plain-text links.""" + if not state.md.options.linkify: + return False + if state.linkLevel > 0: + return False + if not state.md.linkify: + raise ModuleNotFoundError("Linkify enabled but not installed.") + + pos = state.pos + maximum = state.posMax + + if ( + (pos + 3) > maximum + or state.src[pos] != ":" + or state.src[pos + 1] != "/" + or state.src[pos + 2] != "/" + ): + return False + + if not (match := SCHEME_RE.match(state.pending)): + return False + + proto = match.group(1) + if not (link := state.md.linkify.match_at_start(state.src[pos - len(proto) :])): + return False + url: str = link.url + + # disallow '*' at the end of the link (conflicts with emphasis) + url = url.rstrip("*") + + full_url = state.md.normalizeLink(url) + if not state.md.validateLink(full_url): + return False + + if not silent: + state.pending = state.pending[: -len(proto)] + + token = state.push("link_open", "a", 1) + token.attrs = {"href": full_url} + token.markup = "linkify" + token.info = "auto" + + token = state.push("text", "", 0) + token.content = state.md.normalizeLinkText(url) + + token = state.push("link_close", "a", -1) + token.markup = "linkify" + token.info = "auto" + + state.pos += len(url) - len(proto) + return True diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 143ab33e..c0c491c4 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -70,6 +70,10 @@ def __init__( self.backticks: dict[int, int] = {} self.backticksScanned = False + # Counter used to disable inline linkify-it execution + # inside and markdown links + self.linkLevel = 0 + def __repr__(self) -> str: return ( f"{self.__class__.__name__}" diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py index 64a2bbe8..178d717e 100644 --- a/tests/test_api/test_main.py +++ b/tests/test_api/test_main.py @@ -30,6 +30,7 @@ def test_get_rules(): ], "inline": [ "text", + "linkify", "newline", "escape", "backticks", diff --git a/tests/test_port/fixtures/linkify.md b/tests/test_port/fixtures/linkify.md index c9755c03..f51bb6b9 100644 --- a/tests/test_port/fixtures/linkify.md +++ b/tests/test_port/fixtures/linkify.md @@ -29,6 +29,84 @@ don't touch text in html tags

    https://example.com

    . +entities inside raw links +. +https://example.com/foo&bar +. +

    https://example.com/foo&amp;bar

    +. + + +emphasis inside raw links (asterisk, can happen in links with params) +. +https://example.com/foo*bar*baz +. +

    https://example.com/foo*bar*baz

    +. + + +emphasis inside raw links (underscore) +. +http://example.org/foo._bar_-_baz +. +

    http://example.org/foo._bar_-_baz

    +. + + +backticks inside raw links +. +https://example.com/foo`bar`baz +. +

    https://example.com/foo`bar`baz

    +. + + +links inside raw links +. +https://example.com/foo[123](456)bar +. +

    https://example.com/foo[123](456)bar

    +. + + +escapes not allowed at the start +. +\https://example.com +. +

    \https://example.com

    +. + + +escapes not allowed at comma +. +https\://example.com +. +

    https://example.com

    +. + + +escapes not allowed at slashes +. +https:\//aa.org https://bb.org +. +

    https://aa.org https://bb.org

    +. + + +fuzzy link shouldn't match cc.org +. +https:/\/cc.org +. +

    https://cc.org

    +. + + +bold links (exclude markup of pairs from link tail) +. +**http://example.com/foobar** +. +

    http://example.com/foobar

    +. match links without protocol . @@ -37,6 +115,35 @@ www.example.org

    www.example.org

    . +coverage, prefix not valid +. +http:/example.com/ +. +

    http:/example.com/

    +. + + +coverage, negative link level +. +[https://example.com](https://example.com) +. +

    https://example.com

    +. + + +emphasis with '*', real link: +. +http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B +. +

    http://cdecl.ridiculousfish.com/?q=int+(*f)+(float+*)%3B

    +. + +emphasis with '_', real link: +. +https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf +. +

    https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf

    +. emails . From d5b3874539f50faf435ecb81355546cd8a63dd40 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 20:46:32 +0200 Subject: [PATCH 52/97] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Parse=20entities=20t?= =?UTF-8?q?o=20`text=5Fspecial`=20token=20(#280)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than adding directly to text. The `text_join` core rule then joins it to the text later, but after typographic rules have been applied. Implements upstream: https://github.com/markdown-it/markdown-it/commita/3fc0deb38b5a8b2eb8f46c727cc4e299e5ae5f9c --- markdown_it/rules_inline/entity.py | 67 +++++++++++++------------ tests/test_port/fixtures/smartquotes.md | 13 +++++ tests/test_port/fixtures/typographer.md | 7 +++ 3 files changed, 55 insertions(+), 32 deletions(-) diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index d3b5f6bb..ec9d3965 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -16,35 +16,38 @@ def entity(state: StateInline, silent: bool) -> bool: if state.src[pos] != "&": return False - if (pos + 1) < maximum: - if state.src[pos + 1] == "#": - match = DIGITAL_RE.search(state.src[pos:]) - if match: - if not silent: - match1 = match.group(1) - code = ( - int(match1[1:], 16) - if match1[0].lower() == "x" - else int(match1, 10) - ) - state.pending += ( - fromCodePoint(code) - if isValidEntityCode(code) - else fromCodePoint(0xFFFD) - ) - - state.pos += len(match.group(0)) - return True - - else: - match = NAMED_RE.search(state.src[pos:]) - if match and match.group(1) in entities: - if not silent: - state.pending += entities[match.group(1)] - state.pos += len(match.group(0)) - return True - - if not silent: - state.pending += "&" - state.pos += 1 - return True + if pos + 1 >= maximum: + return False + + if state.src[pos + 1] == "#": + if match := DIGITAL_RE.search(state.src[pos:]): + if not silent: + match1 = match.group(1) + code = ( + int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10) + ) + + token = state.push("text_special", "", 0) + token.content = ( + fromCodePoint(code) + if isValidEntityCode(code) + else fromCodePoint(0xFFFD) + ) + token.markup = match.group(0) + token.info = "entity" + + state.pos += len(match.group(0)) + return True + + else: + if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities: + if not silent: + token = state.push("text_special", "", 0) + token.content = entities[match.group(1)] + token.markup = match.group(0) + token.info = "entity" + + state.pos += len(match.group(0)) + return True + + return False diff --git a/tests/test_port/fixtures/smartquotes.md b/tests/test_port/fixtures/smartquotes.md index e77175aa..8ed314e2 100644 --- a/tests/test_port/fixtures/smartquotes.md +++ b/tests/test_port/fixtures/smartquotes.md @@ -177,3 +177,16 @@ Should be escapable:

    "foo"

    "foo"

    . + +Should not replace entities: +. +"foo" + +"foo" + +"foo" +. +

    "foo"

    +

    "foo"

    +

    "foo"

    +. diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md index 59e48941..d72a7c2f 100644 --- a/tests/test_port/fixtures/typographer.md +++ b/tests/test_port/fixtures/typographer.md @@ -130,3 +130,10 @@ regression tests for #624

    1–2–3

    1 – – 3

    . + +shouldn't replace entities +. +(c) (c) (c) +. +

    (c) (c) ©

    +. From dd51f6222ed5e93ecfcf4514f67c71be99430011 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 21:18:21 +0200 Subject: [PATCH 53/97] =?UTF-8?q?=E2=80=BC=EF=B8=8F=20Remove=20(p)=20=3D>?= =?UTF-8?q?=20=C2=A7=20replacement=20in=20typographer=20(#281)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `(p)` is no longer replaced with § by typographer (conflicts with ℗) Implements upstream: https://github.com/markdown-it/markdown-it/commit/f52351499be1e6c838110c31e07154cce1d91d47 --- markdown_it/port.yaml | 6 +++--- markdown_it/rules_core/replacements.py | 9 ++++----- tests/test_port/fixtures/typographer.md | 9 --------- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index a553fe1a..3e289e9e 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it - version: 12.3.2 - commit: d72c68b520cedacae7878caa92bf7fe32e3e0e6f - date: Jan 8, 2022 + version: 13.0.1 + commit: e843acc9edad115cbf8cf85e676443f01658be08 + date: May 3, 2022 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index 0b6e86af..14912e17 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -3,7 +3,6 @@ * ``(c)``, ``(C)`` → © * ``(tm)``, ``(TM)`` → ™ * ``(r)``, ``(R)`` → ® -* ``(p)``, ``(P)`` → § * ``+-`` → ± * ``...`` → … * ``?....`` → ?.. @@ -26,15 +25,15 @@ # TODO: # - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾ -# - miltiplication 2 x 4 -> 2 × 4 +# - multiplication 2 x 4 -> 2 × 4 RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--") # Workaround for phantomjs - need regex without /g flag, # or root check will fail every second time -# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)" +# SCOPED_ABBR_TEST_RE = r"\((c|tm|r)\)" -SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE) +SCOPED_ABBR_RE = re.compile(r"\((c|tm|r)\)", flags=re.IGNORECASE) PLUS_MINUS_RE = re.compile(r"\+-") @@ -53,7 +52,7 @@ EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE) -SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"} +SCOPED_ABBR = {"c": "©", "r": "®", "tm": "™"} def replaceFn(match: re.Match[str]) -> str: diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md index d72a7c2f..23825e5d 100644 --- a/tests/test_port/fixtures/typographer.md +++ b/tests/test_port/fixtures/typographer.md @@ -41,15 +41,6 @@ trademark

    ™ ™

    . - -paragraph -. -(p) (P) -. -

    § §

    -. - - plus-minus . +-5 From 64965cfdac910a30a6a886ab3e5e710fa2649360 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 21:27:40 +0200 Subject: [PATCH 54/97] =?UTF-8?q?=F0=9F=91=8C=20Show=20text=20of=20`text?= =?UTF-8?q?=5Fspecial`=20in=20`tree.pretty`=20(#282)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provides a demonstration of how the new `text_special` token parsing works --- markdown_it/tree.py | 7 ++++++- tests/test_tree.py | 7 +++++++ tests/test_tree/test_pretty_text_special.xml | 11 +++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 tests/test_tree/test_pretty_text_special.xml diff --git a/markdown_it/tree.py b/markdown_it/tree.py index a39ba32a..6641e5a4 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -229,7 +229,12 @@ def pretty( if not self.is_root and self.attrs: text += " " + " ".join(f"{k}={v!r}" for k, v in self.attrs.items()) text += ">" - if show_text and not self.is_root and self.type == "text" and self.content: + if ( + show_text + and not self.is_root + and self.type in ("text", "text_special") + and self.content + ): text += "\n" + textwrap.indent(self.content, prefix + " " * indent) for child in self.children: text += "\n" + child.pretty( diff --git a/tests/test_tree.py b/tests/test_tree.py index c5203b0b..36bd0b67 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -76,6 +76,13 @@ def test_pretty(file_regression): file_regression.check(node.pretty(indent=2, show_text=True), extension=".xml") +def test_pretty_text_special(file_regression): + md = MarkdownIt() + md.disable("text_join") + tree = SyntaxTreeNode(md.parse("foo © bar \\(")) + file_regression.check(tree.pretty(show_text=True), extension=".xml") + + def test_walk(): tokens = MarkdownIt().parse(EXAMPLE_MARKDOWN) tree = SyntaxTreeNode(tokens) diff --git a/tests/test_tree/test_pretty_text_special.xml b/tests/test_tree/test_pretty_text_special.xml new file mode 100644 index 00000000..211d790c --- /dev/null +++ b/tests/test_tree/test_pretty_text_special.xml @@ -0,0 +1,11 @@ + + + + + foo + + © + + bar + + ( \ No newline at end of file From 90b367d9c340cb6db1cf3e0ebf96127ddc72497f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 22:47:54 +0200 Subject: [PATCH 55/97] =?UTF-8?q?=F0=9F=94=A7=20Add=20typing=20of=20rule?= =?UTF-8?q?=20functions=20(#283)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rule functions signature is specific to the state it acts on. --- docs/conf.py | 1 - markdown_it/parser_block.py | 12 +++++-- markdown_it/parser_core.py | 10 ++++-- markdown_it/parser_inline.py | 19 +++++++---- markdown_it/ruler.py | 45 +++++++++++++------------- markdown_it/rules_block/lheading.py | 3 +- markdown_it/rules_block/paragraph.py | 3 +- tests/test_api/test_plugin_creation.py | 20 ++++++------ 8 files changed, 65 insertions(+), 48 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 2b48df1e..4fa12262 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,7 +53,6 @@ ".*Literal.*", ".*_Result", "EnvType", - "RuleFunc", "Path", "Ellipsis", ) diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 86f08cf5..32749127 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -2,7 +2,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Callable from . import rules_block from .ruler import Ruler @@ -16,7 +16,13 @@ LOGGER = logging.getLogger(__name__) -_rules: list[tuple[str, Any, list[str]]] = [ +RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool] +"""(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool) + +`silent` disables token generation, useful for lookahead. +""" + +_rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [ # First 2 params - rule name & source. Secondary array - list of rules, # which can be terminated by this one. ("table", rules_block.table, ["paragraph", "reference"]), @@ -45,7 +51,7 @@ class ParserBlock: """ def __init__(self) -> None: - self.ruler = Ruler() + self.ruler = Ruler[RuleFuncBlockType]() for name, rule, alt in _rules: self.ruler.push(name, rule, {"alt": alt}) diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index b3eb8abe..ca5ab256 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -6,7 +6,9 @@ """ from __future__ import annotations -from .ruler import RuleFunc, Ruler +from typing import Callable + +from .ruler import Ruler from .rules_core import ( block, inline, @@ -18,7 +20,9 @@ ) from .rules_core.state_core import StateCore -_rules: list[tuple[str, RuleFunc]] = [ +RuleFuncCoreType = Callable[[StateCore], None] + +_rules: list[tuple[str, RuleFuncCoreType]] = [ ("normalize", normalize), ("block", block), ("inline", inline), @@ -31,7 +35,7 @@ class ParserCore: def __init__(self) -> None: - self.ruler = Ruler() + self.ruler = Ruler[RuleFuncCoreType]() for name, rule in _rules: self.ruler.push(name, rule) diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index febe4e6e..0026c383 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -2,10 +2,10 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Callable from . import rules_inline -from .ruler import RuleFunc, Ruler +from .ruler import Ruler from .rules_inline.state_inline import StateInline from .token import Token from .utils import EnvType @@ -13,8 +13,14 @@ if TYPE_CHECKING: from markdown_it import MarkdownIt + # Parser rules -_rules: list[tuple[str, RuleFunc]] = [ +RuleFuncInlineType = Callable[[StateInline, bool], bool] +"""(state: StateInline, silent: bool) -> matched: bool) + +`silent` disables token generation, useful for lookahead. +""" +_rules: list[tuple[str, RuleFuncInlineType]] = [ ("text", rules_inline.text), ("linkify", rules_inline.linkify), ("newline", rules_inline.newline), @@ -34,7 +40,8 @@ # # Don't use this for anything except pairs (plugins working with `balance_pairs`). # -_rules2: list[tuple[str, RuleFunc]] = [ +RuleFuncInline2Type = Callable[[StateInline], None] +_rules2: list[tuple[str, RuleFuncInline2Type]] = [ ("balance_pairs", rules_inline.link_pairs), ("strikethrough", rules_inline.strikethrough.postProcess), ("emphasis", rules_inline.emphasis.postProcess), @@ -46,11 +53,11 @@ class ParserInline: def __init__(self) -> None: - self.ruler = Ruler() + self.ruler = Ruler[RuleFuncInlineType]() for name, rule in _rules: self.ruler.push(name, rule) # Second ruler used for post-processing (e.g. in emphasis-like rules) - self.ruler2 = Ruler() + self.ruler2 = Ruler[RuleFuncInline2Type]() for name, rule2 in _rules2: self.ruler2.push(name, rule2) diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 9849561d..bd8baba3 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -17,9 +17,9 @@ class Ruler """ from __future__ import annotations -from collections.abc import Callable, Iterable +from collections.abc import Iterable from dataclasses import dataclass, field -from typing import TYPE_CHECKING, TypedDict +from typing import TYPE_CHECKING, Generic, TypedDict, TypeVar import warnings from markdown_it._compat import DATACLASS_KWARGS @@ -57,33 +57,30 @@ def srcCharCode(self) -> tuple[int, ...]: return self._srcCharCode -# The first positional arg is always a subtype of `StateBase`. Other -# arguments may or may not exist, based on the rule's type (block, -# core, inline). Return type is either `None` or `bool` based on the -# rule's type. -RuleFunc = Callable # type: ignore - - class RuleOptionsType(TypedDict, total=False): alt: list[str] +RuleFuncTv = TypeVar("RuleFuncTv") +"""A rule function, whose signature is dependent on the state type.""" + + @dataclass(**DATACLASS_KWARGS) -class Rule: +class Rule(Generic[RuleFuncTv]): name: str enabled: bool - fn: RuleFunc = field(repr=False) + fn: RuleFuncTv = field(repr=False) alt: list[str] -class Ruler: +class Ruler(Generic[RuleFuncTv]): def __init__(self) -> None: # List of added rules. - self.__rules__: list[Rule] = [] + self.__rules__: list[Rule[RuleFuncTv]] = [] # Cached rule chains. # First level - chain name, '' for default. # Second level - diginal anchor for fast filtering by charcodes. - self.__cache__: dict[str, list[RuleFunc]] | None = None + self.__cache__: dict[str, list[RuleFuncTv]] | None = None def __find__(self, name: str) -> int: """Find rule index by name""" @@ -112,7 +109,7 @@ def __compile__(self) -> None: self.__cache__[chain].append(rule.fn) def at( - self, ruleName: str, fn: RuleFunc, options: RuleOptionsType | None = None + self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None ) -> None: """Replace rule by name with new function & options. @@ -133,7 +130,7 @@ def before( self, beforeName: str, ruleName: str, - fn: RuleFunc, + fn: RuleFuncTv, options: RuleOptionsType | None = None, ) -> None: """Add new rule to chain before one with given name. @@ -148,14 +145,16 @@ def before( options = options or {} if index == -1: raise KeyError(f"Parser rule not found: {beforeName}") - self.__rules__.insert(index, Rule(ruleName, True, fn, options.get("alt", []))) + self.__rules__.insert( + index, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", [])) + ) self.__cache__ = None def after( self, afterName: str, ruleName: str, - fn: RuleFunc, + fn: RuleFuncTv, options: RuleOptionsType | None = None, ) -> None: """Add new rule to chain after one with given name. @@ -171,12 +170,12 @@ def after( if index == -1: raise KeyError(f"Parser rule not found: {afterName}") self.__rules__.insert( - index + 1, Rule(ruleName, True, fn, options.get("alt", [])) + index + 1, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", [])) ) self.__cache__ = None def push( - self, ruleName: str, fn: RuleFunc, options: RuleOptionsType | None = None + self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None ) -> None: """Push new rule to the end of chain. @@ -185,7 +184,9 @@ def push( :param options: new rule options (not mandatory). """ - self.__rules__.append(Rule(ruleName, True, fn, (options or {}).get("alt", []))) + self.__rules__.append( + Rule[RuleFuncTv](ruleName, True, fn, (options or {}).get("alt", [])) + ) self.__cache__ = None def enable( @@ -252,7 +253,7 @@ def disable( self.__cache__ = None return result - def getRules(self, chainName: str) -> list[RuleFunc]: + def getRules(self, chainName: str = "") -> list[RuleFuncTv]: """Return array of active functions (rules) for given chain name. It analyzes rules configuration, compiles caches if not exists and returns result. diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index fbd50699..3522207a 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -1,7 +1,6 @@ # lheading (---, ==) import logging -from ..ruler import Ruler from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -12,7 +11,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b level = None nextLine = startLine + 1 - ruler: Ruler = state.md.block.ruler + ruler = state.md.block.ruler terminatorRules = ruler.getRules("paragraph") if state.is_code_block(startLine): diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py index 3c7d43d3..5388a4b1 100644 --- a/markdown_it/rules_block/paragraph.py +++ b/markdown_it/rules_block/paragraph.py @@ -1,7 +1,6 @@ """Paragraph.""" import logging -from ..ruler import Ruler from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -13,7 +12,7 @@ def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> ) nextLine = startLine + 1 - ruler: Ruler = state.md.block.ruler + ruler = state.md.block.ruler terminatorRules = ruler.getRules("paragraph") endLine = state.lineMax diff --git a/tests/test_api/test_plugin_creation.py b/tests/test_api/test_plugin_creation.py index 3a9af8bb..d5bda748 100644 --- a/tests/test_api/test_plugin_creation.py +++ b/tests/test_api/test_plugin_creation.py @@ -6,10 +6,11 @@ def inline_rule(state, silent): print("plugin called") + return False def test_inline_after(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.inline.ruler.after("text", "new_rule", inline_rule) MarkdownIt().use(_plugin).parse("[") @@ -17,7 +18,7 @@ def _plugin(_md): def test_inline_before(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.inline.ruler.before("text", "new_rule", inline_rule) MarkdownIt().use(_plugin).parse("a") @@ -25,7 +26,7 @@ def _plugin(_md): def test_inline_at(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.inline.ruler.at("text", inline_rule) MarkdownIt().use(_plugin).parse("a") @@ -34,10 +35,11 @@ def _plugin(_md): def block_rule(state, startLine, endLine, silent): print("plugin called") + return False def test_block_after(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.block.ruler.after("hr", "new_rule", block_rule) MarkdownIt().use(_plugin).parse("a") @@ -45,7 +47,7 @@ def _plugin(_md): def test_block_before(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.block.ruler.before("hr", "new_rule", block_rule) MarkdownIt().use(_plugin).parse("a") @@ -53,7 +55,7 @@ def _plugin(_md): def test_block_at(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.block.ruler.at("hr", block_rule) MarkdownIt().use(_plugin).parse("a") @@ -65,7 +67,7 @@ def core_rule(state): def test_core_after(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.core.ruler.after("normalize", "new_rule", core_rule) MarkdownIt().use(_plugin).parse("a") @@ -73,7 +75,7 @@ def _plugin(_md): def test_core_before(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.core.ruler.before("normalize", "new_rule", core_rule) MarkdownIt().use(_plugin).parse("a") @@ -81,7 +83,7 @@ def _plugin(_md): def test_core_at(capsys): - def _plugin(_md): + def _plugin(_md: MarkdownIt) -> None: _md.core.ruler.at("normalize", core_rule) MarkdownIt().use(_plugin).parse("a") From 4c3a34de7544ea8ef2cc0e752bf7d246b9a71d22 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 2 Jun 2023 23:45:14 +0200 Subject: [PATCH 56/97] =?UTF-8?q?=E2=80=BC=EF=B8=8F=20Remove=20unused=20`s?= =?UTF-8?q?ilent`=20arg=20in=20`ParserBlock.tokenize`=20(#284)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .readthedocs.yml | 1 - markdown_it/parser_block.py | 4 +--- pyproject.toml | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 1d750008..611695db 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -7,7 +7,6 @@ python: path: . extra_requirements: - linkify - - plugins - rtd sphinx: diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 32749127..72360f9b 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -55,9 +55,7 @@ def __init__(self) -> None: for name, rule, alt in _rules: self.ruler.push(name, rule, {"alt": alt}) - def tokenize( - self, state: StateBlock, startLine: int, endLine: int, silent: bool = False - ) -> None: + def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None: """Generate tokens for input range.""" rules = self.ruler.getRules("") line = startLine diff --git a/pyproject.toml b/pyproject.toml index b0d64fb1..ea7cd036 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ compare = [ linkify = ["linkify-it-py>=1,<3"] plugins = ["mdit-py-plugins"] rtd = [ - "attrs", + "mdit-py-plugins @ git+https://github.com/executablebooks/mdit-py-plugins@master", "myst-parser", "pyyaml", "sphinx", From e146728ce5b1efe52c7a75c67abc2520c07f00bb Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 3 Jun 2023 00:13:58 +0200 Subject: [PATCH 57/97] =?UTF-8?q?=F0=9F=93=9A=20Update=20docs=20(#285)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/ISSUE_TEMPLATE/bug_report.yml | 63 ++++++++++++++++++++++++++ .github/ISSUE_TEMPLATE/config.yml | 4 ++ .github/ISSUE_TEMPLATE/enhancement.yml | 44 ++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/enhancement.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..df7e2306 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,63 @@ +name: Report a problem 🐛 +description: Problem reports are for when something behaves incorrectly, or differently from how you'd expect. +labels: [bug] +body: +- type: textarea + id: describe + attributes: + label: Describe the bug + description: | + Provide a short description (one or two sentences) about the problem. What did you expect to happen, and what is actually happening? + + If possible, provide screenshots or error messages that you've encountered. + value: | + **context** + When I do ___. + + **expectation** + I expected ___ to occur. + + **bug** + But instead ___ happens + Here's an error message I ran into... + + **problem** + This is a problem for people doing ___ because ___. + + validations: + required: true + +- type: textarea + id: reproduce + attributes: + label: Reproduce the bug + description: | + Provide information that others may use to re-produce this behavior. + For example: + + - Step-by-step instructions that others can follow. + - Links to a website that demonstrates the bug. + - Information about certain conditions that the bug pops up. + + placeholder: | + 1. Go to '...' + 2. Click on '....' + 3. Scroll down to '....' + 4. See error + validations: + required: true + +- type: textarea + id: environment + attributes: + label: List your environment + description: | + List the environment needed to reproduce the error. + Here are a few ideas: + + - Version of markdown-it-py + - Versions of mdit-py-plugins + - The version of Python you're using. + - Your operating system + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..46e5f734 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: Questions or general discussion ❓🗣️ + url: https://github.com/executablebooks/markdown-it-py/discussions + about: Use Disussions for general conversations that aren't meant for actionable Issues. diff --git a/.github/ISSUE_TEMPLATE/enhancement.yml b/.github/ISSUE_TEMPLATE/enhancement.yml new file mode 100644 index 00000000..68eac5e6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/enhancement.yml @@ -0,0 +1,44 @@ +name: Request an enhancement 💡 +description: Suggest an idea for this project +labels: [enhancement] +body: + +- type: textarea + id: context + attributes: + label: Context + description: | + - Provide background to help others understand this issue. + - Describe the problem or need you'd like to address. + validations: + required: true + + +- type: textarea + id: proposal + attributes: + label: Proposal + description: | + - A simple and clear description of what you're proposing. + - Ideas or constraints for how to implement this proposal + - Important considerations to think about or discuss + validations: + required: false + + +- type: textarea + id: tasks + attributes: + label: Tasks and updates + description: | + Use this area to track ongoing work and to-do items. + The more specific the better. + + _If you can't think of anything then just leave this blank and we can fill it in later! This can be filled in as we understand more about an issue._ + + placeholder: | + - [ ] Discuss and decide on what to do... + - [ ] Implement partial feature A... + + validations: + required: false From bee6d1953be75717a3f2f6a917da6f464bed421d Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 3 Jun 2023 08:31:15 +0200 Subject: [PATCH 58/97] =?UTF-8?q?=F0=9F=9A=80=20RELEASE:=20v3.0.0=20(#286)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 78 +++++++++++++++++++++++++++++++++++++++++ README.md | 1 + docs/index.md | 1 + markdown_it/__init__.py | 2 +- 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa8dc6b7..1d02f42d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,83 @@ # Change Log +## 3.0.0 - 2023-06-03 + +⚠️ This release contains some minor breaking changes in the internal API and improvements to the parsing strictness. + +**Full Changelog**: + +### ⬆️ UPGRADE: Drop support for Python 3.7 + +Also add testing for Python 3.11 + +### ⬆️ UPGRADE: Update from upstream markdown-it `12.2.0` to `13.0.0` + +A key change is the addition of a new `Token` type, `text_special`, which is used to represent HTML entities and backslash escaped characters. +This ensures that (core) typographic transformation rules are not incorrectly applied to these texts. +The final core rule is now the new `text_join` rule, which joins adjacent `text`/`text_special` tokens, +and so no `text_special` tokens should be present in the final token stream. +Any custom typographic rules should be inserted before `text_join`. + +A new `linkify` rule has also been added to the inline chain, which will linkify full URLs (e.g. `https://example.com`), +and fixes collision of emphasis and linkifier (so `http://example.org/foo._bar_-_baz` is now a single link, not emphasized). +Emails and fuzzy links are not affected by this. + +* ♻️ Refactor backslash escape logic, add `text_special` [#276](https://github.com/executablebooks/markdown-it-py/pull/276) +* ♻️ Parse entities to `text_special` token [#280](https://github.com/executablebooks/markdown-it-py/pull/280) +* ♻️ Refactor: Add linkifier rule to inline chain for full links [#279](https://github.com/executablebooks/markdown-it-py/pull/279) +* ‼️ Remove `(p)` => `§` replacement in typographer [#281](https://github.com/executablebooks/markdown-it-py/pull/281) +* ‼️ Remove unused `silent` arg in `ParserBlock.tokenize` [#284](https://github.com/executablebooks/markdown-it-py/pull/284) +* 🐛 FIX: numeric character reference passing [#272](https://github.com/executablebooks/markdown-it-py/pull/272) +* 🐛 Fix: tab preventing paragraph continuation in lists [#274](https://github.com/executablebooks/markdown-it-py/pull/274) +* 👌 Improve nested emphasis parsing [#273](https://github.com/executablebooks/markdown-it-py/pull/273) +* 👌 fix possible ReDOS in newline rule [#275](https://github.com/executablebooks/markdown-it-py/pull/275) +* 👌 Improve performance of `skipSpaces`/`skipChars` [#271](https://github.com/executablebooks/markdown-it-py/pull/271) +* 👌 Show text of `text_special` in `tree.pretty` [#282](https://github.com/executablebooks/markdown-it-py/pull/282) + +### ♻️ REFACTOR: Replace most character code use with strings + +The use of `StateBase.srcCharCode` is deprecated (with backward-compatibility), and all core uses are replaced by `StateBase.src`. + +Conversion of source string characters to an integer representing the Unicode character is prevalent in the upstream JavaScript implementation, to improve performance. +However, it is unnecessary in Python and leads to harder to read code and performance deprecations (during the conversion in the `StateBase` initialisation). + +See [#270](https://github.com/executablebooks/markdown-it-py/pull/270), thanks to [@hukkinj1](https://github.com/hukkinj1). + +### ♻️ Centralise indented code block tests + +For CommonMark, the presence of indented code blocks prevent any other block element from having an indent of greater than 4 spaces. +Certain Markdown flavors and derivatives, such as mdx and djot, disable these code blocks though, since it is more common to use code fences and/or arbitrary indenting is desirable. +Previously, disabling code blocks did not remove the indent limitation, since most block elements had the 3 space limitation hard-coded. +This change centralised the logic of applying this limitation (in `StateBlock.is_code_block`), and only applies it when indented code blocks are enabled. + +This allows for e.g. + +```md +
    +
    + + I can indent as much as I want here. + +
    +
    +``` + +See [#260](https://github.com/executablebooks/markdown-it-py/pull/260) + +### 🔧 Maintenance changes + +Strict type annotation checking has been applied to the whole code base, +[ruff](https://github.com/charliermarsh/ruff) is now used for linting, +and fuzzing tests have been added to the CI, to integrate with Google [OSS-Fuzz](https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py) testing, thanks to [@DavidKorczynski](https://github.com/DavidKorczynski). + +* 🔧 MAINTAIN: Make type checking strict [#](https://github.com/executablebooks/markdown-it-py/pull/267) +* 🔧 Add typing of rule functions [#283](https://github.com/executablebooks/markdown-it-py/pull/283) +* 🔧 Move linting from flake8 to ruff [#268](https://github.com/executablebooks/markdown-it-py/pull/268) +* 🧪 CI: Add fuzzing workflow for PRs [#262](https://github.com/executablebooks/markdown-it-py/pull/262) +* 🔧 Add tox env for fuzz testcase run [#263](https://github.com/executablebooks/markdown-it-py/pull/263) +* 🧪 Add OSS-Fuzz set up by @DavidKorczynski in [#255](https://github.com/executablebooks/markdown-it-py/pull/255) +* 🧪 Fix fuzzing test failures [#254](https://github.com/executablebooks/markdown-it-py/pull/254) + ## 2.2.0 - 2023-02-22 ### What's Changed diff --git a/README.md b/README.md index 789588fe..2ff747ef 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ - Pluggable: Adds syntax extensions to extend the parser (see the [plugin list][md-plugins]). - High speed (see our [benchmarking tests][md-performance]) - [Safe by default][md-security] +- Member of [Google's Assured Open Source Software](https://cloud.google.com/assured-open-source-software/docs/supported-packages) This is a Python port of [markdown-it], and some of its associated plugins. For more details see: . diff --git a/docs/index.md b/docs/index.md index 64fd344d..bff3ac31 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,6 +7,7 @@ - {fa}`check,text-success mr-1` Pluggable: Adds syntax extensions to extend the parser (see the [plugin list](md/plugins)) - {fa}`check,text-success mr-1` High speed (see our [benchmarking tests](md/performance)) - {fa}`check,text-success mr-1` [Safe by default](md/security) +- {fa}`check,text-success mr-1` Member of [Google's Assured Open Source Software](https://cloud.google.com/assured-open-source-software/docs/supported-packages) For a good introduction to [markdown-it] see the __[Live demo](https://markdown-it.github.io)__. This is a Python port of the well used [markdown-it], and some of its associated plugins. diff --git a/markdown_it/__init__.py b/markdown_it/__init__.py index 882a0c3e..6606868a 100644 --- a/markdown_it/__init__.py +++ b/markdown_it/__init__.py @@ -1,5 +1,5 @@ """A Python port of Markdown-It""" __all__ = ("MarkdownIt",) -__version__ = "2.2.0" +__version__ = "3.0.0" from .main import MarkdownIt From dcf40fc104b5dc06f621216874d14c4a64d9f5fc Mon Sep 17 00:00:00 2001 From: tsutsu3 Date: Wed, 14 Jun 2023 22:21:54 +0900 Subject: [PATCH 59/97] =?UTF-8?q?=F0=9F=93=9A=20Fix=20CI=20badge=20link=20?= =?UTF-8?q?(#291)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2ff747ef..5573b622 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ Big thanks to the authors of [markdown-it]: Also [John MacFarlane](https://github.com/jgm) for his work on the CommonMark spec and reference implementations. -[github-ci]: https://github.com/executablebooks/markdown-it-py/workflows/Python%20package/badge.svg?branch=master +[github-ci]: https://github.com/executablebooks/markdown-it-py/actions/workflows/tests.yml/badge.svg?branch=master [github-link]: https://github.com/executablebooks/markdown-it-py [pypi-badge]: https://img.shields.io/pypi/v/markdown-it-py.svg [pypi-link]: https://pypi.org/project/markdown-it-py From a095740cd754dccc572b77252790f23aa84c8430 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 27 Jun 2023 20:45:36 +0200 Subject: [PATCH 60/97] =?UTF-8?q?=F0=9F=93=9A=20Update=20docs=20(#292)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 7 +++++-- benchmarking/bench_packages.py | 9 +++++++++ docs/_static/markdown-it-py.svg | 24 ++++++++++++++++++++++++ docs/architecture.md | 2 +- docs/conf.py | 3 +++ docs/contributing.md | 2 +- docs/index.md | 3 ++- docs/performance.md | 28 ++++++++++++++++++++++++++++ docs/{other.md => security.md} | 31 ------------------------------- pyproject.toml | 5 +++-- 10 files changed, 76 insertions(+), 38 deletions(-) create mode 100644 docs/_static/markdown-it-py.svg create mode 100644 docs/performance.md rename docs/{other.md => security.md} (51%) diff --git a/README.md b/README.md index 5573b622..ddb6fb59 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,9 @@ For details on [markdown-it] itself, see: - The __[Live demo](https://markdown-it.github.io)__ - [The markdown-it README][markdown-it-readme] +**See also:** [markdown-it-pyrs](https://github.com/chrisjsewell/markdown-it-pyrs) for an experimental Rust binding, +for even more speed! + ## Installation ```bash @@ -143,6 +146,6 @@ Also [John MacFarlane](https://github.com/jgm) for his work on the CommonMark sp [CommonMark spec]: http://spec.commonmark.org/ [markdown-it]: https://github.com/markdown-it/markdown-it [markdown-it-readme]: https://github.com/markdown-it/markdown-it/blob/master/README.md -[md-security]: https://markdown-it-py.readthedocs.io/en/latest/other.html -[md-performance]: https://markdown-it-py.readthedocs.io/en/latest/other.html +[md-security]: https://markdown-it-py.readthedocs.io/en/latest/security.html +[md-performance]: https://markdown-it-py.readthedocs.io/en/latest/performance.html [md-plugins]: https://markdown-it-py.readthedocs.io/en/latest/plugins.html diff --git a/benchmarking/bench_packages.py b/benchmarking/bench_packages.py index 1158750e..084557a9 100644 --- a/benchmarking/bench_packages.py +++ b/benchmarking/bench_packages.py @@ -18,6 +18,15 @@ def test_markdown_it_py(benchmark, spec_text): benchmark(parser.render, spec_text) +@pytest.mark.benchmark(group="packages") +def test_markdown_it_pyrs(benchmark, spec_text): + import markdown_it_pyrs + + parser = markdown_it_pyrs.MarkdownIt("commonmark") + benchmark.extra_info["version"] = markdown_it_pyrs.__version__ + benchmark(parser.render, spec_text) + + @pytest.mark.benchmark(group="packages") def test_mistune(benchmark, spec_text): import mistune diff --git a/docs/_static/markdown-it-py.svg b/docs/_static/markdown-it-py.svg new file mode 100644 index 00000000..9ac486af --- /dev/null +++ b/docs/_static/markdown-it-py.svg @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/architecture.md b/docs/architecture.md index bebcf9dc..5190bbd2 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -1,6 +1,6 @@ (md/architecture)= -# markdown-it design principles +# Design principles (md/data-flow)= ## Data flow diff --git a/docs/conf.py b/docs/conf.py index 4fa12262..ed38bb50 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -64,8 +64,11 @@ # a list of builtin themes. # html_title = "markdown-it-py" +html_logo = html_favicon = "_static/markdown-it-py.svg" html_theme = "sphinx_book_theme" html_theme_options = { + "home_page_in_toc": True, + "use_repository_button": True, "use_edit_page_button": True, "repository_url": "https://github.com/executablebooks/markdown-it-py", "repository_branch": "master", diff --git a/docs/contributing.md b/docs/contributing.md index 6c43e0e0..d553c451 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -1,4 +1,4 @@ -# Contribute to markdown-it-py +# Contribute We welcome all contributions! ✨ diff --git a/docs/index.md b/docs/index.md index bff3ac31..96827f74 100644 --- a/docs/index.md +++ b/docs/index.md @@ -33,7 +33,8 @@ Also [John MacFarlane](https://github.com/jgm) for his work on the CommonMark sp using architecture -other +security +performance plugins contributing api/markdown_it diff --git a/docs/performance.md b/docs/performance.md new file mode 100644 index 00000000..d75c2159 --- /dev/null +++ b/docs/performance.md @@ -0,0 +1,28 @@ +(md/performance)= + +# Performance + +You can view our continuous integration benchmarking analysis at: , +or you can run it for yourself within the repository: + +```bash +tox -e py311-bench-packages -- --benchmark-columns mean,stddev +``` + +| package | version | mean (ms) | stddev | +| -------------------- | ------- | --------- | ------- | +| markdown-it-pyrs[^1] | 0.2.1 | 6.410 | 0.426 | +| mistune[^2] | 3.0.1 | 80.409 | 2.335 | +| **markdown-it-py** | 3.0.0 | 97.242 | 4.427 | +| mistletoe | 1.1.0 | 99.633 | 4.628 | +| commonmark-py | 0.9.1 | 300.403 | 9.706 | +| pymarkdown | 3.4.3 | 387.775 | 10.394 | +| pymarkdown (+extras) | 3.4.3 | 646.564 | 11.316 | +| panflute | 2.3.0 | 860.105 | 208.607 | + +As you can see, `markdown-it-py` doesn't pay with speed for it's flexibility. + +[^1]: `markdown-it-pyrs` is a Rust implementation of `markdown-it-py`'s parser, in beta development, check it out at: +[^2]: `mistune` is not CommonMark compliant, which is what allows for its +faster parsing, at the expense of issues, for example, with nested inline parsing. +See [mistletoes's explanation](https://github.com/miyuchina/mistletoe/blob/master/performance.md) for further details. diff --git a/docs/other.md b/docs/security.md similarity index 51% rename from docs/other.md rename to docs/security.md index cfc5ba8c..3770d35a 100644 --- a/docs/other.md +++ b/docs/security.md @@ -33,34 +33,3 @@ If those depend on user input - always add prefixes to avoid DOM clobbering. See [discussion](https://github.com/markdown-it/markdown-it/issues/28) for details. So, if you decide to use plugins that add extended class syntax or autogenerating header anchors - be careful. - -(md/performance)= - -# Performance - -You can view our continuous integration benchmarking analysis at: , -or you can run it for yourself within the repository: - -```console -$ tox -e py38-bench-packages -- --benchmark-columns mean,stddev - -Name (time in ms) Mean StdDev ---------------------------------------------------------------- -test_mistune 70.3272 (1.0) 0.7978 (1.0) -test_mistletoe 116.0919 (1.65) 6.2870 (7.88) -test_markdown_it_py 152.9022 (2.17) 4.2988 (5.39) -test_commonmark_py 326.9506 (4.65) 15.8084 (19.81) -test_pymarkdown 368.2712 (5.24) 7.5906 (9.51) -test_pymarkdown_extra 640.4913 (9.11) 15.1769 (19.02) -test_panflute 678.3547 (9.65) 9.4622 (11.86) ---------------------------------------------------------------- -``` - -As you can see, `markdown-it-py` doesn't pay with speed for it's flexibility. - -```{note} -`mistune` is not CommonMark compliant, which is what allows for its -faster parsing, at the expense of issues, for example, with nested inline parsing. -See [mistletoes's explanation](https://github.com/miyuchina/mistletoe/blob/master/performance.md) -for further details. -``` diff --git a/pyproject.toml b/pyproject.toml index ea7cd036..46cbc762 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,13 +39,14 @@ compare = [ "commonmark~=0.9", "markdown~=3.4", "mistletoe~=1.0", - "mistune~=2.0", + "mistune~=3.0", "panflute~=2.3", + "markdown-it-pyrs", ] linkify = ["linkify-it-py>=1,<3"] plugins = ["mdit-py-plugins"] rtd = [ - "mdit-py-plugins @ git+https://github.com/executablebooks/mdit-py-plugins@master", + "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", From e5a0ff8e18dd9310d8170d495e7c662698fbd82b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 27 Jun 2023 20:54:25 +0200 Subject: [PATCH 61/97] =?UTF-8?q?=F0=9F=93=9A=20Add=20icon=20to=20readme?= =?UTF-8?q?=20(#293)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index ddb6fb59..9bebca33 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,10 @@ [![Code style: black][black-badge]][black-link] [![PyPI - Downloads][install-badge]][install-link] +

    + markdown-it-py icon +

    + > Markdown parser done right. - Follows the __[CommonMark spec](http://spec.commonmark.org/)__ for baseline parsing From c2071270b86737c156244a39097867596459db9e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Jun 2023 20:56:53 +0200 Subject: [PATCH 62/97] [pre-commit.ci] pre-commit autoupdate (#290) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2aecdc6d..8b6bda1a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,12 +34,12 @@ repos: - id: black - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.270 + rev: v0.0.275 hooks: - id: ruff - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.3.0 + rev: v1.4.1 hooks: - id: mypy additional_dependencies: [mdurl] From 14cca384e608c49d205866347d66e35cb2b66ff9 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 27 Jun 2023 22:22:31 +0200 Subject: [PATCH 63/97] =?UTF-8?q?=F0=9F=93=9A=20Update=20icon=20(#294)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/_static/markdown-it-py.svg | 34 +++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/docs/_static/markdown-it-py.svg b/docs/_static/markdown-it-py.svg index 9ac486af..a8e15cf3 100644 --- a/docs/_static/markdown-it-py.svg +++ b/docs/_static/markdown-it-py.svg @@ -1,24 +1,34 @@ - - - - - - + + + + + + + - + - + + + + + + + + + + + + + - + - - - From 3613e8016ecafe21709471ee0032a90a4157c2d1 Mon Sep 17 00:00:00 2001 From: DJ Ramones <50655786+djramones@users.noreply.github.com> Date: Wed, 5 Jul 2023 22:51:52 +0800 Subject: [PATCH 64/97] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20Clarify=20docs=20r?= =?UTF-8?q?egarding=20security=20configuration=20(#296)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Chris Sewell --- README.md | 2 +- docs/index.md | 2 +- docs/security.md | 39 ++++++++++++++++++++++++--------------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 9bebca33..43a5da23 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ - Configurable syntax: you can add new rules and even replace existing ones. - Pluggable: Adds syntax extensions to extend the parser (see the [plugin list][md-plugins]). - High speed (see our [benchmarking tests][md-performance]) -- [Safe by default][md-security] +- Easy to configure for [security][md-security] - Member of [Google's Assured Open Source Software](https://cloud.google.com/assured-open-source-software/docs/supported-packages) This is a Python port of [markdown-it], and some of its associated plugins. diff --git a/docs/index.md b/docs/index.md index 96827f74..a5484518 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,7 +6,7 @@ - {fa}`check,text-success mr-1` Configurable syntax: you can add new rules and even replace existing ones. - {fa}`check,text-success mr-1` Pluggable: Adds syntax extensions to extend the parser (see the [plugin list](md/plugins)) - {fa}`check,text-success mr-1` High speed (see our [benchmarking tests](md/performance)) -- {fa}`check,text-success mr-1` [Safe by default](md/security) +- {fa}`check,text-success mr-1` Easy to configure for [security](md/security) - {fa}`check,text-success mr-1` Member of [Google's Assured Open Source Software](https://cloud.google.com/assured-open-source-software/docs/supported-packages) For a good introduction to [markdown-it] see the __[Live demo](https://markdown-it.github.io)__. diff --git a/docs/security.md b/docs/security.md index 3770d35a..7cbf765f 100644 --- a/docs/security.md +++ b/docs/security.md @@ -2,27 +2,36 @@ # Security -Many people don't understand that markdown format does not care much about security. -In many cases you have to pass output to sanitizers. -`markdown-it` provides 2 possible strategies to produce safe output: +By default, the `MarkdownIt` parser is initialised to comply with the [CommonMark spec](https://spec.commonmark.org/), which allows for parsing arbitrary HTML tags. +This can be useful for many use cases, for example when writing articles for one's own blog or composing technical documentation for a software package. -1. Don't enable HTML. Extend markup features with [plugins](md/plugins). - We think it's the best choice and use it by default. - - That's ok for 99% of user needs. - - Output will be safe without sanitizer. -2. Enable HTML and use external sanitizer package(s). +However, extra precautions are needed when parsing content from untrusted sources. +Generally, the output should be run through sanitizers to ensure safety and prevent vulnerabilities like cross-site scripting (XSS). +With `markdown-it`/`markdown-it-py`, there are two strategies for doing this: -Also by default `markdown-it` prohibits some kind of links, which could be used -for XSS: +1. Enable HTML (as is needed for full CommonMark compliance), and then use external sanitizer package(s). +2. Disable HTML, and then use [plugins](md/plugins) to selectively enable markup features. + This removes the need for further sanitizing. + +```{warning} +Unlike the original `markdown-it` JavaScript project, which uses the second, safe-by-default strategy, `markdown-it-py` enables the more convenient, but less secure, CommonMark-compliant settings by default. + +This is not safe when using `markdown-it-py` in web applications that parse user-submitted content. +In such cases, [using the `js-default` preset](using.md) is strongly recommended. +For example: + +```python +from markdown_it import MarkdownIt +MarkdownIt("js-default").render("*user-submitted* text") +``` + +Note that even with the default configuration, `markdown-it-py` prohibits some kind of links which could be used for XSS: - `javascript:`, `vbscript:` - `file:` -- `data:`, except some images (gif/png/jpeg/webp). - -So, by default `markdown-it` should be safe. We care about it. +- `data:` (except some images: gif/png/jpeg/webp) -If you find a security problem - contact us via . -Such reports are fixed with top priority. +If you find a security problem, please report it to . ## Plugins From 97d32b2a869429cdd3ac01bb6334ada85589143e Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 21 Nov 2023 12:55:09 +0100 Subject: [PATCH 65/97] =?UTF-8?q?=F0=9F=93=9A=20Fix=20the=20RTD=20build=20?= =?UTF-8?q?(#311)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .readthedocs.yml | 6 +++++- pyproject.toml | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 611695db..57297d4b 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,7 +1,11 @@ version: 2 +build: + os: ubuntu-22.04 + tools: + python: "3.8" + python: - version: "3.8" install: - method: pip path: . diff --git a/pyproject.toml b/pyproject.toml index 46cbc762..89e738c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ rtd = [ "sphinx-design", "sphinx_book_theme", "jupyter_sphinx", + "ipykernel", ] testing = [ "coverage", From 4f2e5f252b9552cbc899d477e4b1d2d682ba6d2c Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 24 Nov 2023 08:18:36 +0000 Subject: [PATCH 66/97] =?UTF-8?q?=F0=9F=94=A7=20Replace=20black=20and=20is?= =?UTF-8?q?ort=20with=20ruff=20formatter=20(#313)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 20 ++++++-------------- markdown_it/cli/parse.py | 6 +++--- markdown_it/common/utils.py | 4 ++-- markdown_it/presets/__init__.py | 2 +- markdown_it/renderer.py | 2 +- markdown_it/rules_inline/autolink.py | 2 +- pyproject.toml | 12 +++++------- 7 files changed, 19 insertions(+), 29 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b6bda1a..ed0d43c9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,30 +16,22 @@ exclude: > repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-json - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/pycqa/isort - rev: 5.12.0 - hooks: - - id: isort - - - repo: https://github.com/psf/black - rev: 23.3.0 - hooks: - - id: black - - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.275 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.6 hooks: - id: ruff + args: [--fix] + - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.4.1 + rev: v1.7.0 hooks: - id: mypy additional_dependencies: [mdurl] diff --git a/markdown_it/cli/parse.py b/markdown_it/cli/parse.py index 890d5de3..bfd3449d 100644 --- a/markdown_it/cli/parse.py +++ b/markdown_it/cli/parse.py @@ -13,7 +13,7 @@ from markdown_it import __version__ from markdown_it.main import MarkdownIt -version_str = "markdown-it-py [version {}]".format(__version__) +version_str = f"markdown-it-py [version {__version__}]" def main(args: Sequence[str] | None = None) -> int: @@ -35,7 +35,7 @@ def convert_file(filename: str) -> None: Parse a Markdown file and dump the output to stdout. """ try: - with open(filename, "r", encoding="utf8", errors="ignore") as fin: + with open(filename, encoding="utf8", errors="ignore") as fin: rendered = MarkdownIt().render(fin.read()) print(rendered, end="") except OSError: @@ -100,7 +100,7 @@ def parse_args(args: Sequence[str] | None) -> argparse.Namespace: def print_heading() -> None: - print("{} (interactive)".format(version_str)) + print(f"{version_str} (interactive)") print("Type Ctrl-D to complete input, or Ctrl-C to exit.") diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 0d11e3e3..dbe082a1 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -194,7 +194,7 @@ def isWhiteSpace(code: int) -> bool: # ////////////////////////////////////////////////////////////////////////////// UNICODE_PUNCT_RE = re.compile( - r"[!-#%-\*,-\/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4E\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD803[\uDF55-\uDF59]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC8\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDF3C-\uDF3E]|\uD806[\uDC3B\uDE3F-\uDE46\uDE9A-\uDE9C\uDE9E-\uDEA2]|\uD807[\uDC41-\uDC45\uDC70\uDC71\uDEF7\uDEF8]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD81B[\uDE97-\uDE9A]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]" # noqa: E501 + r"[!-#%-\*,-\/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4E\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD803[\uDF55-\uDF59]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC8\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDF3C-\uDF3E]|\uD806[\uDC3B\uDE3F-\uDE46\uDE9A-\uDE9C\uDE9E-\uDEA2]|\uD807[\uDC41-\uDC45\uDC70\uDC71\uDEF7\uDEF8]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD81B[\uDE97-\uDE9A]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]" ) @@ -251,7 +251,7 @@ def isMdAsciiPunct(ch: int) -> bool: Don't confuse with unicode punctuation !!! It lacks some chars in ascii range. - """ # noqa: E501 + """ return ch in MD_ASCII_PUNCT diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py index 1e6796a2..c3fb9e9b 100644 --- a/markdown_it/presets/__init__.py +++ b/markdown_it/presets/__init__.py @@ -1,7 +1,7 @@ __all__ = ("commonmark", "default", "zero", "js_default", "gfm_like") -from . import commonmark, default, zero from ..utils import PresetType +from . import commonmark, default, zero js_default = default diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 7fee9ffa..5a774d06 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -155,7 +155,7 @@ def renderToken( if token.nesting == 1 and (idx + 1 < len(tokens)): nextToken = tokens[idx + 1] - if nextToken.type == "inline" or nextToken.hidden: # noqa: SIM114 + if nextToken.type == "inline" or nextToken.hidden: # Block-level tag containing an inline tag. # needLf = False diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index 295d963f..6546e250 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -4,7 +4,7 @@ from .state_inline import StateInline EMAIL_RE = re.compile( - r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$" # noqa: E501 + r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$" ) AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$") diff --git a/pyproject.toml b/pyproject.toml index 89e738c0..e1c192c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,14 +82,12 @@ exclude = [ "benchmarking/" ] -[tool.isort] -profile = "black" -force_sort_within_sections = true - [tool.ruff] -line-length = 100 -extend-select = ["B0", "C4", "ICN", "ISC", "N", "RUF", "SIM"] -extend-ignore = ["ISC003", "N802", "N803", "N806", "N816", "RUF003"] +extend-select = ["B0", "C4", "I", "ICN", "ISC", "N", "RUF", "SIM", "UP"] +extend-ignore = ["ISC001", "ISC003", "N802", "N803", "N806", "N816", "RUF003"] + +[tool.ruff.lint.isort] +force-sort-within-sections = true [tool.mypy] show_error_codes = true From 98ef73d25592a71e540e41920df9a90b6bba25bc Mon Sep 17 00:00:00 2001 From: Bastian Venthur Date: Fri, 24 Nov 2023 09:21:07 +0100 Subject: [PATCH 67/97] Fixed Code Style paragraph still referring to flake8 (#309) Co-authored-by: Chris Sewell --- docs/contributing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing.md b/docs/contributing.md index d553c451..8b46e678 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -16,7 +16,7 @@ Details of the port can be found in the `markdown_it/port.yaml` and in `port.yam ## Code Style -Code style is tested using [flake8](http://flake8.pycqa.org), with the configuration set in `.flake8`, and code formatted with [black](https://github.com/ambv/black). +Code style is tested using [ruff](https://github.com/astral-sh/ruff), with the configuration set in `pyproject.toml`, and code formatted with [black](https://github.com/ambv/black). Installing with `markdown-it-py[code_style]` makes the [pre-commit](https://pre-commit.com/) package available, which will ensure this style is met before commits are submitted, by reformatting the code and testing for lint errors. It can be setup by: From c4ffcd731d0956f1fad407db8c6d78286111ac33 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Fri, 24 Nov 2023 10:25:09 +0200 Subject: [PATCH 68/97] =?UTF-8?q?=F0=9F=A7=AA=20Add=20CI=20testing=20for?= =?UTF-8?q?=20Python=203.12=20(#303)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Chris Sewell --- .github/workflows/benchmark.yml | 4 ++-- .github/workflows/tests.yml | 13 +++++++------ pyproject.toml | 1 + tox.ini | 6 +++--- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 68cc69bb..93b2781e 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.8 uses: actions/setup-python@v4 @@ -30,7 +30,7 @@ jobs: run: tox -e py38-bench-packages -- --benchmark-min-rounds 20 --benchmark-json bench-packages.json # - name: Upload package data - # uses: actions/upload-artifact@v2 + # uses: actions/upload-artifact@v3 # with: # name: bench-packages # path: bench-packages.json diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ef1bf557..fa170cd1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.8 uses: actions/setup-python@v4 with: @@ -31,14 +31,15 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['pypy-3.8', '3.8', '3.9', '3.10', '3.11'] + python-version: ['pypy-3.8', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + allow-prereleases: true - name: Install dependencies run: | python -m pip install --upgrade pip @@ -63,7 +64,7 @@ jobs: matrix: python-version: ['3.8'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -83,7 +84,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.8 uses: actions/setup-python@v4 @@ -113,7 +114,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python 3.8 uses: actions/setup-python@v4 with: diff --git a/pyproject.toml b/pyproject.toml index e1c192c0..7f5af2b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", diff --git a/tox.ini b/tox.ini index 59ea5f9e..c2915b32 100644 --- a/tox.ini +++ b/tox.ini @@ -9,7 +9,7 @@ envlist = py38 [testenv] usedevelop = true -[testenv:py{38,39,310,311}] +[testenv:py{38,39,310,311,312}] extras = linkify testing @@ -27,11 +27,11 @@ commands_pre = commands = pytest {posargs} -[testenv:py{38,39,310,311}-bench-core] +[testenv:py{38,39,310,311,312}-bench-core] extras = benchmarking commands = pytest benchmarking/bench_core.py {posargs} -[testenv:py{38,39,310,311}-bench-packages] +[testenv:py{38,39,310,311,312}-bench-packages] extras = benchmarking,compare commands = pytest benchmarking/bench_packages.py {posargs} From 9b74610e9ee43b1545a4331a1a3fdb5a85b654a0 Mon Sep 17 00:00:00 2001 From: Bernhard Wagner Date: Tue, 28 Nov 2023 10:32:25 +0100 Subject: [PATCH 69/97] Update index.md (#315) --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index a5484518..d71458c5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -12,7 +12,7 @@ For a good introduction to [markdown-it] see the __[Live demo](https://markdown-it.github.io)__. This is a Python port of the well used [markdown-it], and some of its associated plugins. The driving design philosophy of the port has been to change as little of the fundamental code structure (file names, function name, etc) as possible, just sprinkling in a little Python syntactical sugar ✨. -It is very simple to write complimentary extensions for both language implementations! +It is very simple to write complementary extensions for both language implementations! ## References & Thanks From df3aadfc3d3f3320bc6247523c3474dec29f938c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Dec 2023 09:55:28 +0100 Subject: [PATCH 70/97] [pre-commit.ci] pre-commit autoupdate (#314) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ed0d43c9..a55107cf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,14 +24,14 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.6 + rev: v0.1.8 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.7.0 + rev: v1.7.1 hooks: - id: mypy additional_dependencies: [mdurl] From 15290f9e33b3bceb57be14ddcdeae8448e0f943e Mon Sep 17 00:00:00 2001 From: tsutsu3 Date: Fri, 12 Jan 2024 08:07:36 +0900 Subject: [PATCH 71/97] =?UTF-8?q?=F0=9F=90=9B=20Fix=20emphasis=20inside=20?= =?UTF-8?q?raw=20links=20bugs=20(#320)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed from `re.match` to `re.search` to work as intended. --- markdown_it/rules_inline/linkify.py | 2 +- tests/test_port/fixtures/linkify.md | 35 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/markdown_it/rules_inline/linkify.py b/markdown_it/rules_inline/linkify.py index a8a18153..88224e78 100644 --- a/markdown_it/rules_inline/linkify.py +++ b/markdown_it/rules_inline/linkify.py @@ -27,7 +27,7 @@ def linkify(state: StateInline, silent: bool) -> bool: ): return False - if not (match := SCHEME_RE.match(state.pending)): + if not (match := SCHEME_RE.search(state.pending)): return False proto = match.group(1) diff --git a/tests/test_port/fixtures/linkify.md b/tests/test_port/fixtures/linkify.md index f51bb6b9..02a23b17 100644 --- a/tests/test_port/fixtures/linkify.md +++ b/tests/test_port/fixtures/linkify.md @@ -211,3 +211,38 @@ google\.com .

    google.com

    . + +Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) at beginning of line +. +http://example.org/foo._bar_-_baz This works +. +

    http://example.org/foo._bar_-_baz This works

    +. + +Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) at end of line +. +This doesnt http://example.org/foo._bar_-_baz +. +

    This doesnt http://example.org/foo._bar_-_baz

    +. + +Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) mix1 +. +While this `does` http://example.org/foo._bar_-_baz, this doesnt http://example.org/foo._bar_-_baz and this **does** http://example.org/foo._bar_-_baz +. +

    While this does http://example.org/foo._bar_-_baz, this doesnt http://example.org/foo._bar_-_baz and this does http://example.org/foo._bar_-_baz

    +. + +Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) mix2 +. +This applies to _series of URLs too_ http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz, these dont http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz and these **do** http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz +. +

    This applies to series of URLs too http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz, these dont http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz and these do http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz

    +. + +emphasis inside raw links (asterisk) at end of line +. +This doesnt http://example.org/foo.*bar*-*baz +. +

    This doesnt http://example.org/foo.*bar*-*baz

    +. \ No newline at end of file From a7544ee3931dc6062cbf685fe548706b27edfd87 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 12 Jan 2024 00:25:09 +0100 Subject: [PATCH 72/97] =?UTF-8?q?=F0=9F=93=9A=20Fix=20RTD=20build=20(#322)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7f5af2b7..8a186031 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ rtd = [ "sphinx", "sphinx-copybutton", "sphinx-design", - "sphinx_book_theme", + "sphinx-book-theme~=1.0", "jupyter_sphinx", "ipykernel", ] From 1a71f3f26f5c0106d6fc326d42a5fbde781f5b33 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Jan 2024 00:31:41 +0100 Subject: [PATCH 73/97] [pre-commit.ci] pre-commit autoupdate (#321) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a55107cf..822f02f8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,14 +24,14 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.8 + rev: v0.1.11 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.7.1 + rev: v1.8.0 hooks: - id: mypy additional_dependencies: [mdurl] From 1064835b30325e857252f5b67b34e622261723ca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 28 Feb 2024 02:12:12 +0100 Subject: [PATCH 74/97] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20aiidateam/git?= =?UTF-8?q?hub-action-benchmark=20from=202=20to=203=20(#253)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 93b2781e..4d5436a2 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -37,7 +37,7 @@ jobs: # if-no-files-found: error - name: Store benchmark result - uses: aiidateam/github-action-benchmark@v2 + uses: aiidateam/github-action-benchmark@v3 with: name: Parsing Benchmarks output-file-path: bench-packages.json From 81a5e460814cfc0b260f6852d382f01c5c0880d8 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 7 May 2024 03:16:46 +0200 Subject: [PATCH 75/97] =?UTF-8?q?=F0=9F=A7=AA=20Fix=20codecov=20(#332)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fa170cd1..12b4c746 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -43,7 +43,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install .[testing,linkify] + pip install -e .[testing,linkify] - name: Run pytest run: | pytest tests/ --cov=markdown_it --cov-report=xml --cov-report=term-missing From c10312e2e475a22edb92abede15d3dcabd0cac0c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 May 2024 03:31:00 +0200 Subject: [PATCH 76/97] [pre-commit.ci] pre-commit autoupdate (#323) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Chris Sewell --- .pre-commit-config.yaml | 6 +++--- markdown_it/__init__.py | 1 + markdown_it/cli/parse.py | 1 + markdown_it/common/entities.py | 1 + markdown_it/common/html_re.py | 3 +-- markdown_it/common/utils.py | 4 ++-- markdown_it/helpers/__init__.py | 4 ++-- markdown_it/helpers/parse_link_label.py | 1 + markdown_it/helpers/parse_link_title.py | 4 ++-- markdown_it/main.py | 15 +++++---------- markdown_it/parser_block.py | 1 + markdown_it/parser_core.py | 9 +++++---- markdown_it/parser_inline.py | 4 ++-- markdown_it/presets/commonmark.py | 1 + markdown_it/presets/default.py | 1 + markdown_it/presets/zero.py | 1 + markdown_it/renderer.py | 4 ++-- markdown_it/ruler.py | 1 + markdown_it/rules_block/code.py | 1 + markdown_it/rules_block/heading.py | 3 ++- markdown_it/rules_block/hr.py | 1 + markdown_it/rules_block/paragraph.py | 1 + markdown_it/rules_core/normalize.py | 3 ++- markdown_it/rules_core/replacements.py | 1 + markdown_it/rules_core/smartquotes.py | 4 ++-- markdown_it/rules_core/text_join.py | 1 + markdown_it/rules_inline/balance_pairs.py | 1 + markdown_it/rules_inline/escape.py | 1 + markdown_it/rules_inline/link.py | 4 +--- markdown_it/rules_inline/linkify.py | 1 + markdown_it/rules_inline/newline.py | 1 + markdown_it/tree.py | 7 +++---- pyproject.toml | 2 +- scripts/build_fuzzers.py | 1 + scripts/profiler.py | 1 + tests/test_api/test_plugin_creation.py | 1 + tests/test_cmark_spec/test_spec.py | 1 + tests/test_fuzzer.py | 1 + 38 files changed, 58 insertions(+), 41 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 822f02f8..a35912c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ exclude: > repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-json - id: check-yaml @@ -24,14 +24,14 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.11 + rev: v0.4.3 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.8.0 + rev: v1.10.0 hooks: - id: mypy additional_dependencies: [mdurl] diff --git a/markdown_it/__init__.py b/markdown_it/__init__.py index 6606868a..399c6b70 100644 --- a/markdown_it/__init__.py +++ b/markdown_it/__init__.py @@ -1,4 +1,5 @@ """A Python port of Markdown-It""" + __all__ = ("MarkdownIt",) __version__ = "3.0.0" diff --git a/markdown_it/cli/parse.py b/markdown_it/cli/parse.py index bfd3449d..fe346b2f 100644 --- a/markdown_it/cli/parse.py +++ b/markdown_it/cli/parse.py @@ -4,6 +4,7 @@ Parse one or more markdown files, convert each to HTML, and print to stdout. """ + from __future__ import annotations import argparse diff --git a/markdown_it/common/entities.py b/markdown_it/common/entities.py index 6bb2d343..14d08ec9 100644 --- a/markdown_it/common/entities.py +++ b/markdown_it/common/entities.py @@ -1,4 +1,5 @@ """HTML5 entities map: { name -> characters }.""" + import html.entities entities = {name.rstrip(";"): chars for name, chars in html.entities.html5.items()} diff --git a/markdown_it/common/html_re.py b/markdown_it/common/html_re.py index f0c336d2..dae052e9 100644 --- a/markdown_it/common/html_re.py +++ b/markdown_it/common/html_re.py @@ -1,5 +1,4 @@ -"""Regexps to match html elements -""" +"""Regexps to match html elements""" import re diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index dbe082a1..0dafa2d6 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -1,5 +1,5 @@ -"""Utilities for parsing source text -""" +"""Utilities for parsing source text""" + from __future__ import annotations import re diff --git a/markdown_it/helpers/__init__.py b/markdown_it/helpers/__init__.py index 3dbbdd1d..bcf2dc21 100644 --- a/markdown_it/helpers/__init__.py +++ b/markdown_it/helpers/__init__.py @@ -1,5 +1,5 @@ -"""Functions for parsing Links -""" +"""Functions for parsing Links""" + __all__ = ("parseLinkLabel", "parseLinkDestination", "parseLinkTitle") from .parse_link_destination import parseLinkDestination from .parse_link_label import parseLinkLabel diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py index 01c653c5..c80da5a7 100644 --- a/markdown_it/helpers/parse_link_label.py +++ b/markdown_it/helpers/parse_link_label.py @@ -5,6 +5,7 @@ returns the end of the label """ + from markdown_it.rules_inline import StateInline diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py index 8f589336..fe23ea71 100644 --- a/markdown_it/helpers/parse_link_title.py +++ b/markdown_it/helpers/parse_link_title.py @@ -1,5 +1,5 @@ -"""Parse link title -""" +"""Parse link title""" + from ..common.utils import charCodeAt, unescapeAll diff --git a/markdown_it/main.py b/markdown_it/main.py index bb294a99..bf9fd18f 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -68,24 +68,19 @@ def __repr__(self) -> str: return f"{self.__class__.__module__}.{self.__class__.__name__}()" @overload - def __getitem__(self, name: Literal["inline"]) -> ParserInline: - ... + def __getitem__(self, name: Literal["inline"]) -> ParserInline: ... @overload - def __getitem__(self, name: Literal["block"]) -> ParserBlock: - ... + def __getitem__(self, name: Literal["block"]) -> ParserBlock: ... @overload - def __getitem__(self, name: Literal["core"]) -> ParserCore: - ... + def __getitem__(self, name: Literal["core"]) -> ParserCore: ... @overload - def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol: - ... + def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol: ... @overload - def __getitem__(self, name: str) -> Any: - ... + def __getitem__(self, name: str) -> Any: ... def __getitem__(self, name: str) -> Any: return { diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 72360f9b..3c4d4019 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -1,4 +1,5 @@ """Block-level tokenizer.""" + from __future__ import annotations import logging diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index ca5ab256..77075098 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -1,9 +1,10 @@ """ - * class Core - * - * Top-level rules executor. Glues block/inline parsers and does intermediate - * transformations. +* class Core +* +* Top-level rules executor. Glues block/inline parsers and does intermediate +* transformations. """ + from __future__ import annotations from typing import Callable diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index 0026c383..8f3ac1e6 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -1,5 +1,5 @@ -"""Tokenizes paragraph content. -""" +"""Tokenizes paragraph content.""" + from __future__ import annotations from typing import TYPE_CHECKING, Callable diff --git a/markdown_it/presets/commonmark.py b/markdown_it/presets/commonmark.py index 3990d434..ed0de0fe 100644 --- a/markdown_it/presets/commonmark.py +++ b/markdown_it/presets/commonmark.py @@ -6,6 +6,7 @@ - block: table - inline: strikethrough """ + from ..utils import PresetType diff --git a/markdown_it/presets/default.py b/markdown_it/presets/default.py index c9ab902d..8aa858f7 100644 --- a/markdown_it/presets/default.py +++ b/markdown_it/presets/default.py @@ -1,4 +1,5 @@ """markdown-it default options.""" + from ..utils import PresetType diff --git a/markdown_it/presets/zero.py b/markdown_it/presets/zero.py index 2f69a58d..3f1fc18c 100644 --- a/markdown_it/presets/zero.py +++ b/markdown_it/presets/zero.py @@ -2,6 +2,7 @@ "Zero" preset, with nothing enabled. Useful for manual configuring of simple modes. For example, to parse bold/italic only. """ + from ..utils import PresetType diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 5a774d06..6d60589a 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -5,6 +5,7 @@ class Renderer copy of rules. Those can be rewritten with ease. Also, you can add new rules if you create plugin and adds new token types. """ + from __future__ import annotations from collections.abc import Sequence @@ -21,8 +22,7 @@ class RendererProtocol(Protocol): def render( self, tokens: Sequence[Token], options: OptionsDict, env: EnvType - ) -> Any: - ... + ) -> Any: ... class RendererHTML(RendererProtocol): diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index bd8baba3..711edce7 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -15,6 +15,7 @@ class Ruler rules control use [[MarkdownIt.disable]], [[MarkdownIt.enable]] and [[MarkdownIt.use]]. """ + from __future__ import annotations from collections.abc import Iterable diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py index 89db9cec..af8a41c8 100644 --- a/markdown_it/rules_block/code.py +++ b/markdown_it/rules_block/code.py @@ -1,4 +1,5 @@ """Code block (4 spaces padded).""" + import logging from .state_block import StateBlock diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 850ffb50..afcf9ed4 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -1,4 +1,5 @@ -""" Atex heading (#, ##, ...) """ +"""Atex heading (#, ##, ...)""" + from __future__ import annotations import logging diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 16df05f2..fca7d79d 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -2,6 +2,7 @@ At least 3 of these characters on a line * - _ """ + import logging from ..common.utils import isStrSpace diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py index 5388a4b1..30ba8777 100644 --- a/markdown_it/rules_block/paragraph.py +++ b/markdown_it/rules_block/paragraph.py @@ -1,4 +1,5 @@ """Paragraph.""" + import logging from .state_block import StateBlock diff --git a/markdown_it/rules_core/normalize.py b/markdown_it/rules_core/normalize.py index c9f8d0d5..32439243 100644 --- a/markdown_it/rules_core/normalize.py +++ b/markdown_it/rules_core/normalize.py @@ -1,4 +1,5 @@ """Normalize input string.""" + import re from .state_core import StateCore @@ -13,6 +14,6 @@ def normalize(state: StateCore) -> None: string = NEWLINES_RE.sub("\n", state.src) # Replace NULL characters - string = NULL_RE.sub("\uFFFD", string) + string = NULL_RE.sub("\ufffd", string) state.src = string diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index 14912e17..bcc99800 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -13,6 +13,7 @@ * ``--`` → &ndash * ``---`` → &mdash """ + from __future__ import annotations import logging diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index c98fbd71..f9b8b457 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -1,5 +1,5 @@ -"""Convert straight quotation marks to typographic ones -""" +"""Convert straight quotation marks to typographic ones""" + from __future__ import annotations import re diff --git a/markdown_it/rules_core/text_join.py b/markdown_it/rules_core/text_join.py index d54ccbbc..5379f6d7 100644 --- a/markdown_it/rules_core/text_join.py +++ b/markdown_it/rules_core/text_join.py @@ -5,6 +5,7 @@ For example, `\\:)` shouldn't be replaced with an emoji. """ + from __future__ import annotations from ..token import Token diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index bbb2101c..9c63b27f 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -1,4 +1,5 @@ """Balance paired characters (*, _, etc) in inline tokens.""" + from __future__ import annotations from .state_inline import Delimiter, StateInline diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 9f68b5df..0fca6c84 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -1,6 +1,7 @@ """ Process escaped chars and hardbreaks """ + from ..common.utils import isStrSpace from .state_inline import StateInline diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 78cf9122..2e92c7d8 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -112,9 +112,7 @@ def link(state: StateInline, silent: bool) -> bool: label = normalizeReference(label) - ref = ( - state.env["references"][label] if label in state.env["references"] else None - ) + ref = state.env["references"].get(label, None) if not ref: state.pos = oldPos return False diff --git a/markdown_it/rules_inline/linkify.py b/markdown_it/rules_inline/linkify.py index 88224e78..3669396e 100644 --- a/markdown_it/rules_inline/linkify.py +++ b/markdown_it/rules_inline/linkify.py @@ -1,4 +1,5 @@ """Process links like https://example.org/""" + import re from .state_inline import StateInline diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index ca8f1db0..d05ee6da 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -1,4 +1,5 @@ """Proceess '\n'.""" + from ..common.utils import charStrAt, isStrSpace from .state_inline import StateInline diff --git a/markdown_it/tree.py b/markdown_it/tree.py index 6641e5a4..7e775204 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -2,6 +2,7 @@ This module is not part of upstream JavaScript markdown-it. """ + from __future__ import annotations from collections.abc import Generator, Sequence @@ -78,12 +79,10 @@ def __repr__(self) -> str: return f"{type(self).__name__}({self.type})" @overload - def __getitem__(self: _NodeType, item: int) -> _NodeType: - ... + def __getitem__(self: _NodeType, item: int) -> _NodeType: ... @overload - def __getitem__(self: _NodeType, item: slice) -> list[_NodeType]: - ... + def __getitem__(self: _NodeType, item: slice) -> list[_NodeType]: ... def __getitem__(self: _NodeType, item: int | slice) -> _NodeType | list[_NodeType]: return self.children[item] diff --git a/pyproject.toml b/pyproject.toml index 8a186031..09e696cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,7 +83,7 @@ exclude = [ "benchmarking/" ] -[tool.ruff] +[tool.ruff.lint] extend-select = ["B0", "C4", "I", "ICN", "ISC", "N", "RUF", "SIM", "UP"] extend-ignore = ["ISC001", "ISC003", "N802", "N803", "N806", "N816", "RUF003"] diff --git a/scripts/build_fuzzers.py b/scripts/build_fuzzers.py index 3dce8ddf..00cc1198 100644 --- a/scripts/build_fuzzers.py +++ b/scripts/build_fuzzers.py @@ -1,4 +1,5 @@ """Build fuzzers idempotently in a given folder.""" + import argparse from pathlib import Path import subprocess diff --git a/scripts/profiler.py b/scripts/profiler.py index a593baa1..bdee697c 100644 --- a/scripts/profiler.py +++ b/scripts/profiler.py @@ -4,6 +4,7 @@ - `tox -e profile` - `firefox .tox/prof/output.svg` """ + from pathlib import Path from markdown_it import MarkdownIt diff --git a/tests/test_api/test_plugin_creation.py b/tests/test_api/test_plugin_creation.py index d5bda748..d555be18 100644 --- a/tests/test_api/test_plugin_creation.py +++ b/tests/test_api/test_plugin_creation.py @@ -1,6 +1,7 @@ """Test basic plugin creation functionality: that they can be added and are called correctly """ + from markdown_it import MarkdownIt diff --git a/tests/test_cmark_spec/test_spec.py b/tests/test_cmark_spec/test_spec.py index 88d9fca7..e5199477 100644 --- a/tests/test_cmark_spec/test_spec.py +++ b/tests/test_cmark_spec/test_spec.py @@ -1,6 +1,7 @@ """In this module tests are run against the full test set, provided by https://github.com/commonmark/CommonMark.git. """ + import json from pathlib import Path diff --git a/tests/test_fuzzer.py b/tests/test_fuzzer.py index f3666cc5..7286f8ea 100644 --- a/tests/test_fuzzer.py +++ b/tests/test_fuzzer.py @@ -5,6 +5,7 @@ In the future, perhaps atheris could be directly used here, but it was not directly apparent how to integrate it into pytest. """ + import pytest from markdown_it import MarkdownIt From a4793161da298cb5555cab48cd8d2416fbdf8c48 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 30 Nov 2024 04:03:42 +0000 Subject: [PATCH 77/97] =?UTF-8?q?=F0=9F=94=A7=20Update=20pre-commit=20(#34?= =?UTF-8?q?1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 6 +++--- markdown_it/common/utils.py | 4 +--- markdown_it/helpers/__init__.py | 2 +- markdown_it/helpers/parse_link_destination.py | 2 +- markdown_it/helpers/parse_link_title.py | 2 +- markdown_it/presets/__init__.py | 2 +- markdown_it/rules_block/__init__.py | 10 +++++----- markdown_it/rules_core/__init__.py | 4 ++-- markdown_it/rules_inline/__init__.py | 18 +++++++++--------- 9 files changed, 24 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a35912c7..d6005f6e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ exclude: > repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: check-json - id: check-yaml @@ -24,14 +24,14 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.3 + rev: v0.8.1 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.10.0 + rev: v1.13.0 hooks: - id: mypy additional_dependencies: [mdurl] diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 0dafa2d6..59812a80 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -70,9 +70,7 @@ def isValidEntityCode(c: int) -> bool: if c >= 0x7F and c <= 0x9F: return False # out of range - if c > 0x10FFFF: - return False - return True + return not (c > 0x10FFFF) def fromCodePoint(c: int) -> str: diff --git a/markdown_it/helpers/__init__.py b/markdown_it/helpers/__init__.py index bcf2dc21..f4e2cd21 100644 --- a/markdown_it/helpers/__init__.py +++ b/markdown_it/helpers/__init__.py @@ -1,6 +1,6 @@ """Functions for parsing Links""" -__all__ = ("parseLinkLabel", "parseLinkDestination", "parseLinkTitle") +__all__ = ("parseLinkDestination", "parseLinkLabel", "parseLinkTitle") from .parse_link_destination import parseLinkDestination from .parse_link_label import parseLinkLabel from .parse_link_title import parseLinkTitle diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index f42b2244..93989eb5 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -6,7 +6,7 @@ class _Result: - __slots__ = ("ok", "pos", "lines", "str") + __slots__ = ("lines", "ok", "pos", "str") def __init__(self) -> None: self.ok = False diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py index fe23ea71..f002c7c4 100644 --- a/markdown_it/helpers/parse_link_title.py +++ b/markdown_it/helpers/parse_link_title.py @@ -4,7 +4,7 @@ class _Result: - __slots__ = ("ok", "pos", "lines", "str") + __slots__ = ("lines", "ok", "pos", "str") def __init__(self) -> None: self.ok = False diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py index c3fb9e9b..e21c7806 100644 --- a/markdown_it/presets/__init__.py +++ b/markdown_it/presets/__init__.py @@ -1,4 +1,4 @@ -__all__ = ("commonmark", "default", "zero", "js_default", "gfm_like") +__all__ = ("commonmark", "default", "gfm_like", "js_default", "zero") from ..utils import PresetType from . import commonmark, default, zero diff --git a/markdown_it/rules_block/__init__.py b/markdown_it/rules_block/__init__.py index bcf138df..517da231 100644 --- a/markdown_it/rules_block/__init__.py +++ b/markdown_it/rules_block/__init__.py @@ -1,15 +1,15 @@ __all__ = ( "StateBlock", - "paragraph", - "heading", - "lheading", + "blockquote", "code", "fence", + "heading", "hr", + "html_block", + "lheading", "list_block", + "paragraph", "reference", - "blockquote", - "html_block", "table", ) diff --git a/markdown_it/rules_core/__init__.py b/markdown_it/rules_core/__init__.py index c9c5368c..e7d77536 100644 --- a/markdown_it/rules_core/__init__.py +++ b/markdown_it/rules_core/__init__.py @@ -1,11 +1,11 @@ __all__ = ( "StateCore", - "normalize", "block", "inline", + "linkify", + "normalize", "replace", "smartquotes", - "linkify", "text_join", ) diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py index 3a8026ec..d82ef8fb 100644 --- a/markdown_it/rules_inline/__init__.py +++ b/markdown_it/rules_inline/__init__.py @@ -1,19 +1,19 @@ __all__ = ( "StateInline", - "text", - "fragments_join", - "link_pairs", - "linkify", - "escape", - "newline", + "autolink", "backtick", "emphasis", - "image", - "link", - "autolink", "entity", + "escape", + "fragments_join", "html_inline", + "image", + "link", + "link_pairs", + "linkify", + "newline", "strikethrough", + "text", ) from . import emphasis, strikethrough from .autolink import autolink From bbbaba06091a5ab2986fc78213c0470423200d5b Mon Sep 17 00:00:00 2001 From: Murilo Rosa <67339072+mrmurilo75@users.noreply.github.com> Date: Sat, 30 Nov 2024 04:20:45 +0000 Subject: [PATCH 78/97] Improve README (#340) --- README.md | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 43a5da23..eddebb78 100644 --- a/README.md +++ b/README.md @@ -33,21 +33,28 @@ for even more speed! ## Installation +### PIP + ```bash -conda install -c conda-forge markdown-it-py +pip install markdown-it-py[plugins] ``` -or +or with extras ```bash -pip install markdown-it-py[plugins] +pip install markdown-it-py[linkify,plugins] +``` + +### Conda + +```bash +conda install -c conda-forge markdown-it-py ``` or with extras ```bash conda install -c conda-forge markdown-it-py linkify-it-py mdit-py-plugins -pip install markdown-it-py[linkify,plugins] ``` ## Usage @@ -63,7 +70,7 @@ from mdit_py_plugins.front_matter import front_matter_plugin from mdit_py_plugins.footnote import footnote_plugin md = ( - MarkdownIt('commonmark' ,{'breaks':True,'html':True}) + MarkdownIt('commonmark', {'breaks':True,'html':True}) .use(front_matter_plugin) .use(footnote_plugin) .enable('table') From 21335c8fe628f892ddc4cb7aba4e09e51d1bae2c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 30 Nov 2024 05:33:23 +0100 Subject: [PATCH 79/97] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20actions/setup?= =?UTF-8?q?-python=20from=204=20to=205=20(#327)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/benchmark.yml | 2 +- .github/workflows/tests.yml | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 4d5436a2..2a6ab8d1 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -12,7 +12,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.8 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 12b4c746..a4272ffe 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,7 +21,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' - uses: pre-commit/action@v3.0.0 @@ -36,7 +36,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} allow-prereleases: true @@ -66,7 +66,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install markdown-it-py @@ -87,7 +87,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' @@ -116,7 +116,7 @@ jobs: - name: Checkout source uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' - name: install flit From 876c366ba00bf58d53f926628bd559183371a5a9 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 30 Nov 2024 04:48:33 +0000 Subject: [PATCH 80/97] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Drop=20Python=203.9,?= =?UTF-8?q?=20test=203.13=20(#342)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/benchmark.yml | 6 +++--- .github/workflows/tests.yml | 21 +++++++++++---------- .readthedocs.yml | 2 +- docs/conf.py | 4 ++-- docs/contributing.md | 2 +- markdown_it/common/utils.py | 3 ++- markdown_it/utils.py | 3 ++- pyproject.toml | 8 ++++---- tox.ini | 12 ++++++------ 9 files changed, 32 insertions(+), 29 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 2a6ab8d1..bfa7ff63 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -11,10 +11,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.9 - name: install pandoc uses: r-lib/actions/setup-pandoc@v2 @@ -27,7 +27,7 @@ jobs: pip install tox - name: Run package benchmarks - run: tox -e py38-bench-packages -- --benchmark-min-rounds 20 --benchmark-json bench-packages.json + run: tox -e py39-bench-packages -- --benchmark-min-rounds 20 --benchmark-json bench-packages.json # - name: Upload package data # uses: actions/upload-artifact@v3 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a4272ffe..d0ccc219 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,18 +20,19 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - uses: pre-commit/action@v3.0.0 tests: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - python-version: ['pypy-3.8', '3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['pypy-3.9', '3.9', '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 @@ -48,7 +49,7 @@ jobs: run: | pytest tests/ --cov=markdown_it --cov-report=xml --cov-report=term-missing - name: Upload to Codecov - if: matrix.python-version == '3.8' && github.repository == 'executablebooks/markdown-it-py' + if: matrix.python-version == '3.9' && github.repository == 'executablebooks/markdown-it-py' uses: codecov/codecov-action@v3 with: name: markdown-it-py-pytests @@ -62,7 +63,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8'] + python-version: ['3.9'] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -86,10 +87,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install tox run: | @@ -97,7 +98,7 @@ jobs: pip install tox - name: Run benchmark - run: tox -e py38-bench-core -- --benchmark-json bench-core.json + run: tox -e py39-bench-core -- --benchmark-json bench-core.json - name: Upload data uses: actions/upload-artifact@v3 @@ -115,10 +116,10 @@ jobs: steps: - name: Checkout source uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: install flit run: | pip install flit~=3.4 diff --git a/.readthedocs.yml b/.readthedocs.yml index 57297d4b..5b09f7d1 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -3,7 +3,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.8" + python: "3.9" python: install: diff --git a/docs/conf.py b/docs/conf.py index ed38bb50..dc1a28fd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,7 +48,7 @@ nitpick_ignore_regex = [ ("py:.*", name) for name in ( - "_ItemTV", + ".*_ItemTV", ".*_NodeType", ".*Literal.*", ".*_Result", @@ -84,7 +84,7 @@ intersphinx_mapping = { - "python": ("https://docs.python.org/3.8", None), + "python": ("https://docs.python.org/3.9", None), "mdit-py-plugins": ("https://mdit-py-plugins.readthedocs.io/en/latest/", None), } diff --git a/docs/contributing.md b/docs/contributing.md index 8b46e678..b2302046 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -50,7 +50,7 @@ This can also be used to run benchmarking tests using [pytest-benchmark](https:/ ```shell >> cd markdown-it-py -tox -e py38-bench-packages -- --benchmark-min-rounds 50 +tox -e py39-bench-packages -- --benchmark-min-rounds 50 ``` For documentation build tests: diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 59812a80..fedae7e1 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -3,7 +3,8 @@ from __future__ import annotations import re -from typing import Match, TypeVar +from re import Match +from typing import TypeVar from .entities import entities diff --git a/markdown_it/utils.py b/markdown_it/utils.py index a9793720..86cfee7f 100644 --- a/markdown_it/utils.py +++ b/markdown_it/utils.py @@ -1,8 +1,9 @@ from __future__ import annotations +from collections.abc import Iterable, MutableMapping from collections.abc import MutableMapping as MutableMappingABC from pathlib import Path -from typing import Any, Callable, Iterable, MutableMapping, TypedDict, cast +from typing import Any, Callable, TypedDict, cast EnvType = MutableMapping[str, Any] # note: could use TypeAlias in python 3.10 """Type for the environment sandbox used in parsing and rendering, diff --git a/pyproject.toml b/pyproject.toml index 09e696cd..aa13fadf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,18 +14,18 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Markup", ] keywords = ["markdown", "lexer", "parser", "commonmark", "markdown-it"] -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "mdurl~=0.1", ] diff --git a/tox.ini b/tox.ini index c2915b32..b17bd9d4 100644 --- a/tox.ini +++ b/tox.ini @@ -4,18 +4,18 @@ # then run `tox` or `tox -- {pytest args}` # run in parallel using `tox -p` [tox] -envlist = py38 +envlist = py39 [testenv] usedevelop = true -[testenv:py{38,39,310,311,312}] +[testenv:py{39,310,311,312,313}] extras = linkify testing commands = pytest {posargs:tests/} -[testenv:py{38,39,310,311}-plugins] +[testenv:py{39,310,311,312,313}-plugins] extras = testing changedir = {envtmpdir} allowlist_externals = @@ -27,17 +27,17 @@ commands_pre = commands = pytest {posargs} -[testenv:py{38,39,310,311,312}-bench-core] +[testenv:py{39,310,311,312,313}-bench-core] extras = benchmarking commands = pytest benchmarking/bench_core.py {posargs} -[testenv:py{38,39,310,311,312}-bench-packages] +[testenv:py{39,310,311,312,313}-bench-packages] extras = benchmarking,compare commands = pytest benchmarking/bench_packages.py {posargs} [testenv:docs-{update,clean}] extras = linkify,plugins,rtd -whitelist_externals = +allowlist_externals = echo rm setenv = From aa4e28f8030831c3fd865daf8294decb9d34dd09 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 30 Nov 2024 05:50:00 +0000 Subject: [PATCH 81/97] =?UTF-8?q?=F0=9F=94=A7=20Add=20"store=5Flabels"=20t?= =?UTF-8?q?o=20OptionsType=20(#343)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: hukkin --- .gitignore | 2 ++ .pre-commit-config.yaml | 2 +- docs/conf.py | 1 + docs/contributing.md | 2 +- markdown_it/utils.py | 11 ++++++++++- pyproject.toml | 5 ++++- 6 files changed, 19 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 546e6c2a..0c136bb6 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,5 @@ __pycache__/ .DS_Store docs/api/ + +uv.lock diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d6005f6e..ebc6ab1d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: rev: v1.13.0 hooks: - id: mypy - additional_dependencies: [mdurl] + additional_dependencies: [mdurl, typing-extensions] exclude: > (?x)^( benchmarking/.*\.py| diff --git a/docs/conf.py b/docs/conf.py index dc1a28fd..3a40249d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -55,6 +55,7 @@ "EnvType", "Path", "Ellipsis", + "NotRequired", ) ] diff --git a/docs/contributing.md b/docs/contributing.md index b2302046..eeb65e1c 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -32,7 +32,7 @@ All functions and class methods should be annotated with types and include a doc ## Testing -For code tests, markdown-it-py uses [pytest](https://docs.pytest.org)): +For code tests, markdown-it-py uses [pytest](https://docs.pytest.org): ```shell >> cd markdown-it-py diff --git a/markdown_it/utils.py b/markdown_it/utils.py index 86cfee7f..3d2a20e5 100644 --- a/markdown_it/utils.py +++ b/markdown_it/utils.py @@ -3,7 +3,11 @@ from collections.abc import Iterable, MutableMapping from collections.abc import MutableMapping as MutableMappingABC from pathlib import Path -from typing import Any, Callable, TypedDict, cast +from typing import TYPE_CHECKING, Any, Callable, TypedDict, cast + +if TYPE_CHECKING: + from typing_extensions import NotRequired + EnvType = MutableMapping[str, Any] # note: could use TypeAlias in python 3.10 """Type for the environment sandbox used in parsing and rendering, @@ -32,6 +36,11 @@ class OptionsType(TypedDict): """CSS language prefix for fenced blocks.""" highlight: Callable[[str, str, str], str] | None """Highlighter function: (content, lang, attrs) -> str.""" + store_labels: NotRequired[bool] + """Store link label in link/image token's metadata (under Token.meta['label']). + + This is a Python only option, and is intended for the use of round-trip parsing. + """ class PresetType(TypedDict): diff --git a/pyproject.toml b/pyproject.toml index aa13fadf..d46668a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,9 @@ benchmarking = [ ] profiling = ["gprof2dot"] +[dependency-groups] +mypy = ["mypy", "mdurl", "typing-extensions"] + [project.scripts] markdown-it = "markdown_it.cli.parse:main" @@ -106,7 +109,7 @@ module = ["tests.test_plugins.*", "markdown.*"] ignore_errors = true [[tool.mypy.overrides]] -module = ["markdown.*"] +module = ["markdown.*", "linkify_it.*"] ignore_missing_imports = true [tool.pytest.ini_options] From d86e9a0dd3e13f522493f3aba9c260929f58cd6c Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 30 Nov 2024 06:06:21 +0000 Subject: [PATCH 82/97] =?UTF-8?q?=F0=9F=94=A7=20Move=20`code=5Fstyle`=20to?= =?UTF-8?q?=20dependency=20group=20(#344)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d46668a0..5cb7012d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,6 @@ Homepage = "https://github.com/executablebooks/markdown-it-py" Documentation = "https://markdown-it-py.readthedocs.io/" [project.optional-dependencies] -code_style = ["pre-commit~=3.0"] compare = [ "commonmark~=0.9", "markdown~=3.4", @@ -71,6 +70,7 @@ benchmarking = [ profiling = ["gprof2dot"] [dependency-groups] +pre_commit = ["pre-commit"] mypy = ["mypy", "mdurl", "typing-extensions"] [project.scripts] From 77a7ee6c9b3ef75271bccc79a87820dfe49fd9e2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 30 Nov 2024 07:30:03 +0100 Subject: [PATCH 83/97] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20pre-commit/ac?= =?UTF-8?q?tion=20from=203.0.0=20to=203.0.1=20(#328)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d0ccc219..cd2a5576 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,7 +24,7 @@ jobs: uses: actions/setup-python@v5 with: python-version: '3.9' - - uses: pre-commit/action@v3.0.0 + - uses: pre-commit/action@v3.0.1 tests: From 1a43fa3e9f0698b7486b74ce3dfc16558bf9ba07 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 30 Nov 2024 06:42:33 +0000 Subject: [PATCH 84/97] =?UTF-8?q?=F0=9F=94=A7=20Update=20codecov=20action?= =?UTF-8?q?=20(#345)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/tests.yml | 4 ++-- README.md | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cd2a5576..93bb9cae 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -50,11 +50,11 @@ jobs: pytest tests/ --cov=markdown_it --cov-report=xml --cov-report=term-missing - name: Upload to Codecov if: matrix.python-version == '3.9' && github.repository == 'executablebooks/markdown-it-py' - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v5 with: name: markdown-it-py-pytests flags: pytests - file: ./coverage.xml + files: ./coverage.xml fail_ci_if_error: true token: ${{ secrets.CODECOV_TOKEN }} diff --git a/README.md b/README.md index eddebb78..b94729f8 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![Coverage Status][codecov-badge]][codecov-link] [![PyPI][pypi-badge]][pypi-link] [![Conda][conda-badge]][conda-link] -[![Code style: black][black-badge]][black-link] [![PyPI - Downloads][install-badge]][install-link]

    @@ -149,8 +148,6 @@ Also [John MacFarlane](https://github.com/jgm) for his work on the CommonMark sp [conda-link]: https://anaconda.org/conda-forge/markdown-it-py [codecov-badge]: https://codecov.io/gh/executablebooks/markdown-it-py/branch/master/graph/badge.svg [codecov-link]: https://codecov.io/gh/executablebooks/markdown-it-py -[black-badge]: https://img.shields.io/badge/code%20style-black-000000.svg -[black-link]: https://github.com/ambv/black [install-badge]: https://img.shields.io/pypi/dw/markdown-it-py?label=pypi%20installs [install-link]: https://pypistats.org/packages/markdown-it-py From c5161b550f3c6c0a98d77e8389872405e8f9f9ee Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Tue, 17 Dec 2024 17:38:01 +0200 Subject: [PATCH 85/97] =?UTF-8?q?=F0=9F=91=8C=20Improve=20performance=20of?= =?UTF-8?q?=20"text"=20inline=20rule=20(#347)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markdown_it/rules_inline/text.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index f306b2e4..18b2fcc7 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -1,3 +1,6 @@ +import functools +import re + # Skip text characters for text token, place those to pending buffer # and increment current pos from .state_inline import StateInline @@ -36,11 +39,17 @@ } +@functools.cache +def _terminator_char_regex() -> re.Pattern[str]: + return re.compile("[" + re.escape("".join(_TerminatorChars)) + "]") + + def text(state: StateInline, silent: bool) -> bool: pos = state.pos posMax = state.posMax - while (pos < posMax) and state.src[pos] not in _TerminatorChars: - pos += 1 + + terminator_char = _terminator_char_regex().search(state.src, pos) + pos = terminator_char.start() if terminator_char else posMax if pos == state.pos: return False From 0c933dce5e6422490a0e4807f844522bfac41ac0 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Wed, 18 Dec 2024 15:24:19 +0200 Subject: [PATCH 86/97] =?UTF-8?q?=F0=9F=94=A7=20Use=20`str.removesuffix`?= =?UTF-8?q?=20(#348)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markdown_it/tree.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/markdown_it/tree.py b/markdown_it/tree.py index 7e775204..5369157b 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -162,7 +162,7 @@ def type(self) -> str: if self.token: return self.token.type assert self.nester_tokens - return _removesuffix(self.nester_tokens.opening.type, "_open") + return self.nester_tokens.opening.type.removesuffix("_open") @property def next_sibling(self: _NodeType) -> _NodeType | None: @@ -331,14 +331,3 @@ def hidden(self) -> bool: """If it's true, ignore this element when rendering. Used for tight lists to hide paragraphs.""" return self._attribute_token().hidden - - -def _removesuffix(string: str, suffix: str) -> str: - """Remove a suffix from a string. - - Replace this with str.removesuffix() from stdlib when minimum Python - version is 3.9. - """ - if suffix and string.endswith(suffix): - return string[: -len(suffix)] - return string From 36a9d146af52265420de634cc2e25d1d40cfcdb7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 27 Dec 2024 20:36:16 +0100 Subject: [PATCH 87/97] [pre-commit.ci] pre-commit autoupdate (#346) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ebc6ab1d..7d075425 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,14 +24,14 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.1 + rev: v0.8.4 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.13.0 + rev: v1.14.0 hooks: - id: mypy additional_dependencies: [mdurl, typing-extensions] From 154fe43feb17947e2c933d0bb3e26618129909a8 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 8 Aug 2025 13:01:37 +0200 Subject: [PATCH 88/97] =?UTF-8?q?=F0=9F=94=A7=20Update=20pre-commit=20and?= =?UTF-8?q?=20upload-artifact=20(#359)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/fuzz.yml | 2 +- .github/workflows/tests.yml | 2 +- .pre-commit-config.yaml | 4 ++-- .readthedocs.yml | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index a74869a5..5c5ed478 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -29,7 +29,7 @@ jobs: language: python fuzz-seconds: 60 - name: Upload Crash - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 93bb9cae..4d6bf50a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -101,7 +101,7 @@ jobs: run: tox -e py39-bench-core -- --benchmark-json bench-core.json - name: Upload data - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: bench-core path: bench-core.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7d075425..33f30b12 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,14 +24,14 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.4 + rev: v0.12.8 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.14.0 + rev: v1.17.1 hooks: - id: mypy additional_dependencies: [mdurl, typing-extensions] diff --git a/.readthedocs.yml b/.readthedocs.yml index 5b09f7d1..1faecd92 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -14,5 +14,6 @@ python: - rtd sphinx: + configuration: docs/conf.py builder: html fail_on_warning: true From 9ffe841832d33264201aaff120349f001a02953a Mon Sep 17 00:00:00 2001 From: Elijah Greenstein <197816462+elijahgreenstein@users.noreply.github.com> Date: Fri, 8 Aug 2025 04:06:45 -0700 Subject: [PATCH 89/97] =?UTF-8?q?=F0=9F=93=9A=20DOCS:=20Fix=20a=20few=20ol?= =?UTF-8?q?d=20URLs=20(#358)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/contributing.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index eeb65e1c..ea774a8e 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -2,7 +2,7 @@ We welcome all contributions! ✨ -See the [EBP Contributing Guide](https://executablebooks.org/en/latest/contributing.html) for general details, and below for guidance specific to markdown-it-py. +See the [EBP Contributing Guide](https://executablebooks.org/en/latest/contribute/) for general details, and below for guidance specific to markdown-it-py. Before continuing, make sure you've read: @@ -96,13 +96,13 @@ in a more convenient way. The right sequence is to split text to several tokens and add link tokens in between. The result will be: `text` + `link_open` + `text` + `link_close` + `text`. -See implementations of [linkify](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/linkify.js) and [emoji](https://github.com/markdown-it/markdown-it-emoji/blob/master/lib/replace.js) - those do text token splits. +See implementations of [linkify](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/linkify.mjs) and [emoji](https://github.com/markdown-it/markdown-it-emoji/blob/master/lib/replace.mjs) - those do text token splits. __Note:__ Don't try to replace text with HTML markup! That's not secure. ### Why is my inline rule not executed? -The inline parser skips pieces of texts to optimize speed. It stops only on [a small set of chars](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_inline/text.js), which can be tokens. We did not made this list extensible for performance reasons too. +The inline parser skips pieces of texts to optimize speed. It stops only on [a small set of chars](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_inline/text.mjs), which can be tokens. We did not made this list extensible for performance reasons too. If you are absolutely sure that something important is missing there - create a ticket and we will consider adding it as a new charcode. From fb9d3ab646e4f752dc8fd52e1dd7613c350ff190 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 8 Aug 2025 13:20:13 +0200 Subject: [PATCH 90/97] =?UTF-8?q?=20=E2=AC=86=EF=B8=8F=20Drop=20support=20?= =?UTF-8?q?for=20Python=203.9=20(#360)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is EoL next month https://devguide.python.org/versions --- .github/workflows/benchmark.yml | 6 +++--- .github/workflows/tests.yml | 20 ++++++++++---------- .readthedocs.yml | 2 +- docs/conf.py | 2 +- docs/contributing.md | 2 +- markdown_it/_compat.py | 10 ---------- markdown_it/_punycode.py | 2 +- markdown_it/parser_block.py | 3 ++- markdown_it/parser_core.py | 2 +- markdown_it/parser_inline.py | 3 ++- markdown_it/ruler.py | 4 +--- markdown_it/rules_inline/state_inline.py | 3 +-- markdown_it/token.py | 4 +--- markdown_it/utils.py | 4 ++-- pyproject.toml | 3 +-- tox.ini | 10 +++++----- 16 files changed, 33 insertions(+), 47 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index bfa7ff63..9c96c035 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -11,10 +11,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.9 + - name: Set up Python uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.10 - name: install pandoc uses: r-lib/actions/setup-pandoc@v2 @@ -27,7 +27,7 @@ jobs: pip install tox - name: Run package benchmarks - run: tox -e py39-bench-packages -- --benchmark-min-rounds 20 --benchmark-json bench-packages.json + run: tox -e py310-bench-packages -- --benchmark-min-rounds 20 --benchmark-json bench-packages.json # - name: Upload package data # uses: actions/upload-artifact@v3 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4d6bf50a..8b0b42b9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,10 +20,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.9 + - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.9' + python-version: '3.10' - uses: pre-commit/action@v3.0.1 tests: @@ -32,7 +32,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['pypy-3.9', '3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['pypy-3.10', '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 @@ -49,7 +49,7 @@ jobs: run: | pytest tests/ --cov=markdown_it --cov-report=xml --cov-report=term-missing - name: Upload to Codecov - if: matrix.python-version == '3.9' && github.repository == 'executablebooks/markdown-it-py' + if: matrix.python-version == '3.10' && github.repository == 'executablebooks/markdown-it-py' uses: codecov/codecov-action@v5 with: name: markdown-it-py-pytests @@ -63,7 +63,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.9'] + python-version: ['3.10'] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -87,10 +87,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.9 + - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.9' + python-version: '3.10' - name: Install tox run: | @@ -98,7 +98,7 @@ jobs: pip install tox - name: Run benchmark - run: tox -e py39-bench-core -- --benchmark-json bench-core.json + run: tox -e py310-bench-core -- --benchmark-json bench-core.json - name: Upload data uses: actions/upload-artifact@v4 @@ -116,10 +116,10 @@ jobs: steps: - name: Checkout source uses: actions/checkout@v4 - - name: Set up Python 3.9 + - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.9' + python-version: '3.10' - name: install flit run: | pip install flit~=3.4 diff --git a/.readthedocs.yml b/.readthedocs.yml index 1faecd92..cb68e005 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -3,7 +3,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.9" + python: "3.10" python: install: diff --git a/docs/conf.py b/docs/conf.py index 3a40249d..290eac7f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -85,7 +85,7 @@ intersphinx_mapping = { - "python": ("https://docs.python.org/3.9", None), + "python": ("https://docs.python.org/3.10", None), "mdit-py-plugins": ("https://mdit-py-plugins.readthedocs.io/en/latest/", None), } diff --git a/docs/contributing.md b/docs/contributing.md index ea774a8e..eb73ccda 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -50,7 +50,7 @@ This can also be used to run benchmarking tests using [pytest-benchmark](https:/ ```shell >> cd markdown-it-py -tox -e py39-bench-packages -- --benchmark-min-rounds 50 +tox -e py310-bench-packages -- --benchmark-min-rounds 50 ``` For documentation build tests: diff --git a/markdown_it/_compat.py b/markdown_it/_compat.py index 974d431b..9d48db4f 100644 --- a/markdown_it/_compat.py +++ b/markdown_it/_compat.py @@ -1,11 +1 @@ from __future__ import annotations - -from collections.abc import Mapping -import sys -from typing import Any - -DATACLASS_KWARGS: Mapping[str, Any] -if sys.version_info >= (3, 10): - DATACLASS_KWARGS = {"slots": True} -else: - DATACLASS_KWARGS = {} diff --git a/markdown_it/_punycode.py b/markdown_it/_punycode.py index f9baad27..312048bf 100644 --- a/markdown_it/_punycode.py +++ b/markdown_it/_punycode.py @@ -21,8 +21,8 @@ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import codecs +from collections.abc import Callable import re -from typing import Callable REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]") REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]") diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 3c4d4019..50a7184c 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -2,8 +2,9 @@ from __future__ import annotations +from collections.abc import Callable import logging -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING from . import rules_block from .ruler import Ruler diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index 77075098..8f5b921c 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -7,7 +7,7 @@ from __future__ import annotations -from typing import Callable +from collections.abc import Callable from .ruler import Ruler from .rules_core import ( diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index 8f3ac1e6..26ec2e63 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Callable +from collections.abc import Callable +from typing import TYPE_CHECKING from . import rules_inline from .ruler import Ruler diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 711edce7..91ab5804 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -23,8 +23,6 @@ class Ruler from typing import TYPE_CHECKING, Generic, TypedDict, TypeVar import warnings -from markdown_it._compat import DATACLASS_KWARGS - from .utils import EnvType if TYPE_CHECKING: @@ -66,7 +64,7 @@ class RuleOptionsType(TypedDict, total=False): """A rule function, whose signature is dependent on the state type.""" -@dataclass(**DATACLASS_KWARGS) +@dataclass(slots=True) class Rule(Generic[RuleFuncTv]): name: str enabled: bool diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index c0c491c4..ca70294a 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -4,7 +4,6 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Literal -from .._compat import DATACLASS_KWARGS from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace from ..ruler import StateBase from ..token import Token @@ -14,7 +13,7 @@ from markdown_it import MarkdownIt -@dataclass(**DATACLASS_KWARGS) +@dataclass(slots=True) class Delimiter: # Char code of the starting marker (number). marker: int diff --git a/markdown_it/token.py b/markdown_it/token.py index 90008b72..d6d0b453 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -5,8 +5,6 @@ from typing import Any, Literal import warnings -from markdown_it._compat import DATACLASS_KWARGS - def convert_attrs(value: Any) -> Any: """Convert Token.attrs set as ``None`` or ``[[key, value], ...]`` to a dict. @@ -20,7 +18,7 @@ def convert_attrs(value: Any) -> Any: return value -@dc.dataclass(**DATACLASS_KWARGS) +@dc.dataclass(slots=True) class Token: type: str """Type of the token (string, e.g. "paragraph_open")""" diff --git a/markdown_it/utils.py b/markdown_it/utils.py index 3d2a20e5..2571a158 100644 --- a/markdown_it/utils.py +++ b/markdown_it/utils.py @@ -1,9 +1,9 @@ from __future__ import annotations -from collections.abc import Iterable, MutableMapping +from collections.abc import Callable, Iterable, MutableMapping from collections.abc import MutableMapping as MutableMappingABC from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, TypedDict, cast +from typing import TYPE_CHECKING, Any, TypedDict, cast if TYPE_CHECKING: from typing_extensions import NotRequired diff --git a/pyproject.toml b/pyproject.toml index 5cb7012d..56bd9df2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -25,7 +24,7 @@ classifiers = [ "Topic :: Text Processing :: Markup", ] keywords = ["markdown", "lexer", "parser", "commonmark", "markdown-it"] -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "mdurl~=0.1", ] diff --git a/tox.ini b/tox.ini index b17bd9d4..f8a9b27e 100644 --- a/tox.ini +++ b/tox.ini @@ -4,18 +4,18 @@ # then run `tox` or `tox -- {pytest args}` # run in parallel using `tox -p` [tox] -envlist = py39 +envlist = py310 [testenv] usedevelop = true -[testenv:py{39,310,311,312,313}] +[testenv:py{310,311,312,313}] extras = linkify testing commands = pytest {posargs:tests/} -[testenv:py{39,310,311,312,313}-plugins] +[testenv:py{310,311,312,313}-plugins] extras = testing changedir = {envtmpdir} allowlist_externals = @@ -27,11 +27,11 @@ commands_pre = commands = pytest {posargs} -[testenv:py{39,310,311,312,313}-bench-core] +[testenv:py{310,311,312,313}-bench-core] extras = benchmarking commands = pytest benchmarking/bench_core.py {posargs} -[testenv:py{39,310,311,312,313}-bench-packages] +[testenv:py{310,311,312,313}-bench-packages] extras = benchmarking,compare commands = pytest benchmarking/bench_packages.py {posargs} From 8eb20ac1cb63e09dcd78e84a0c71dab8446bb73c Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 8 Aug 2025 14:29:08 +0200 Subject: [PATCH 91/97] =?UTF-8?q?=F0=9F=94=A7=20Improve=20spec=20update=20?= =?UTF-8?q?script=20(#361)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/benchmark.yml | 2 +- .github/workflows/tests.yml | 3 + docs/contributing.md | 16 +++++ pyproject.toml | 1 + tests/test_cmark_spec/get_cmark_spec.py | 77 +++++++++++++++++++++++++ tests/test_cmark_spec/spec.sh | 26 --------- 6 files changed, 98 insertions(+), 27 deletions(-) create mode 100644 tests/test_cmark_spec/get_cmark_spec.py delete mode 100755 tests/test_cmark_spec/spec.sh diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 9c96c035..37cb7e24 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: 3.10 + python-version: "3.10" - name: install pandoc uses: r-lib/actions/setup-pandoc@v2 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8b0b42b9..aebd2a7c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -45,6 +45,9 @@ jobs: run: | python -m pip install --upgrade pip pip install -e .[testing,linkify] + - name: Check spec file is up to date + run: | + python tests/test_cmark_spec/get_cmark_spec.py - name: Run pytest run: | pytest tests/ --cov=markdown_it --cov-report=xml --cov-report=term-missing diff --git a/docs/contributing.md b/docs/contributing.md index eb73ccda..3a6d6aeb 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -61,6 +61,22 @@ For documentation build tests: >> make html-strict ``` +### Updating the commonmark specification + +If you need to update the commonmark specification, you can do so by running: + +```shell +>> cd markdown-it-py +>> python tests/test_cmark_spec/get_cmark_spec.py +``` + +or + +```shell +>> cd markdown-it-py +>> uv run tests/test_cmark_spec/get_cmark_spec.py +``` + ## Contributing a plugin 1. Does it already exist as JavaScript implementation ([see npm](https://www.npmjs.com/search?q=keywords:markdown-it-plugin))? diff --git a/pyproject.toml b/pyproject.toml index 56bd9df2..86353250 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ testing = [ "pytest", "pytest-cov", "pytest-regressions", + "requests", ] benchmarking = [ "psutil", diff --git a/tests/test_cmark_spec/get_cmark_spec.py b/tests/test_cmark_spec/get_cmark_spec.py new file mode 100644 index 00000000..851cad75 --- /dev/null +++ b/tests/test_cmark_spec/get_cmark_spec.py @@ -0,0 +1,77 @@ +# /// script +# dependencies = [ +# "requests", +# ] +# /// +from pathlib import Path + +default_version = "0.30" +default_output_path = Path(__file__).parent / "commonmark.json" +default_fixture_test_path = ( + Path(__file__).parent.parent / "test_port" / "fixtures" / "commonmark_spec.md" +) + + +def create_argparser(): + import argparse + + parser = argparse.ArgumentParser(description="Download CommonMark spec JSON") + parser.add_argument( + "version", + nargs="?", + default=default_version, + help=f"CommonMark spec version to download (default: {default_version})", + ) + parser.add_argument( + "--output", + "-o", + type=Path, + default=default_output_path, + help=f"Output file path (default: {default_output_path})", + ) + parser.add_argument( + "--test-fixture", + type=Path, + default=default_fixture_test_path, + help=f"Write to test fixture (default: {default_fixture_test_path})", + ) + return parser + + +if __name__ == "__main__": + import requests # type: ignore[import-untyped] + + args = create_argparser().parse_args() + version: str = args.version + output_path: Path = args.output + write_to_test_fixture = True + test_fixture: Path = args.test_fixture + changed = False + url = f"https://spec.commonmark.org/%7Bversion%7D/spec.json" + print(f"Downloading CommonMark spec from {url}") + response = requests.get(url) + response.raise_for_status() + if not output_path.exists() or output_path.read_text() != response.text: + changed = True + with output_path.open("w") as f: + f.write(response.text) + print(f"Updated to {output_path}") + else: + print(f"Spec file {output_path} is up to date, not overwriting") + + if write_to_test_fixture: + data = response.json() + text = "" + for item in data: + text += "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + text += f"src line: {item['start_line'] - 1}\n\n" + text += f".\n{item['markdown']}.\n{item['html']}.\n\n" + if not test_fixture.exists() or test_fixture.read_text() != text: + changed = True + with test_fixture.open("w") as f: + f.write(text) + print(f"Also updated to {test_fixture}") + else: + print(f"Fixture file {test_fixture} is up to date, not overwriting") + + raise SystemExit(0 if not changed else 1) diff --git a/tests/test_cmark_spec/spec.sh b/tests/test_cmark_spec/spec.sh deleted file mode 100755 index c8513903..00000000 --- a/tests/test_cmark_spec/spec.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash - -set -e - -REPO="https://github.com/commonmark/CommonMark.git" -VERSION="0.30" - -function main { - echo "Cloning from repo: $REPO..." - git clone --quiet $REPO - - echo "Using version $VERSION..." - cd "CommonMark" - git checkout --quiet $VERSION - - echo "Dumping tests file..." - python3 "test/spec_tests.py" --dump-tests > "../commonmark.json" - - echo "Cleaning up..." - cd .. - rm -rf CommonMark - - echo "Done." -} - -main From 4535d77edcb5f51450a266135687eeae8bcc5033 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 8 Aug 2025 18:49:43 +0200 Subject: [PATCH 92/97] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Comply=20with=20Comm?= =?UTF-8?q?onmark=200.31.2=20(#362)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR ports https://github.com/markdown-it/markdown-it/commit/cd2477863fdcc182cc8739e9bedc7363acb344d8, which in turn complies with https://spec.commonmark.org/0.31.2/changes.html: - Unicode: ```diff A [Unicode punctuation character](@) is - an [ASCII punctuation character] or anything in - he general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. + a character in the Unicode `P` (puncuation) or `S` (symbol) general categories. ``` - HTML comments: ```diff - An HTML comment consists of ``, - where *text* does not start with `>` or `->`, does not end with `-`, and does not contain `--`. - (See the [HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) + An [HTML comment](@) consists of ``, ``, or ``, and `-->` + (see the [HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). ``` - HTML blocks: ```diff Start condition: line begins the string < or --- markdown_it/common/html_blocks.py | 3 +- markdown_it/common/html_re.py | 4 +- markdown_it/common/utils.py | 8 +- tests/test_cmark_spec/commonmark.json | 3114 ++++++++--------- tests/test_cmark_spec/get_cmark_spec.py | 91 +- tests/test_cmark_spec/spec.md | 168 +- .../test_cmark_spec/test_spec/test_file.html | 159 +- tests/test_port/fixtures/commonmark_spec.md | 1346 +++---- tests/test_port/fixtures/fatal.md | 2 +- 9 files changed, 2459 insertions(+), 2436 deletions(-) diff --git a/markdown_it/common/html_blocks.py b/markdown_it/common/html_blocks.py index 8b199af3..8a3b0b7d 100644 --- a/markdown_it/common/html_blocks.py +++ b/markdown_it/common/html_blocks.py @@ -2,6 +2,7 @@ http://jgm.github.io/CommonMark/spec.html#html-blocks """ +# see https://spec.commonmark.org/0.31.2/#html-blocks block_names = [ "address", "article", @@ -52,8 +53,8 @@ "option", "p", "param", + "search", "section", - "source", "summary", "table", "tbody", diff --git a/markdown_it/common/html_re.py b/markdown_it/common/html_re.py index dae052e9..ab822c5f 100644 --- a/markdown_it/common/html_re.py +++ b/markdown_it/common/html_re.py @@ -15,9 +15,9 @@ open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>" close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>" -comment = "|" +comment = "" processing = "<[?][\\s\\S]*?[?]>" -declaration = "]*>" +declaration = "]*>" cdata = "" HTML_TAG_RE = re.compile( diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index fedae7e1..11bda644 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -5,6 +5,7 @@ import re from re import Match from typing import TypeVar +import unicodedata from .entities import entities @@ -192,15 +193,10 @@ def isWhiteSpace(code: int) -> bool: # ////////////////////////////////////////////////////////////////////////////// -UNICODE_PUNCT_RE = re.compile( - r"[!-#%-\*,-\/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4E\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD803[\uDF55-\uDF59]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC8\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDF3C-\uDF3E]|\uD806[\uDC3B\uDE3F-\uDE46\uDE9A-\uDE9C\uDE9E-\uDEA2]|\uD807[\uDC41-\uDC45\uDC70\uDC71\uDEF7\uDEF8]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD81B[\uDE97-\uDE9A]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]" -) - -# Currently without astral characters support. def isPunctChar(ch: str) -> bool: """Check if character is a punctuation character.""" - return UNICODE_PUNCT_RE.search(ch) is not None + return unicodedata.category(ch).startswith(("P", "S")) MD_ASCII_PUNCT = { diff --git a/tests/test_cmark_spec/commonmark.json b/tests/test_cmark_spec/commonmark.json index d742f941..1f89e66f 100644 --- a/tests/test_cmark_spec/commonmark.json +++ b/tests/test_cmark_spec/commonmark.json @@ -3,5005 +3,5005 @@ "markdown": "\tfoo\tbaz\t\tbim\n", "html": "

    foo\tbaz\t\tbim\n
    \n", "example": 1, - "start_line": 356, - "end_line": 361, + "start_line": 355, + "end_line": 360, "section": "Tabs" }, { "markdown": " \tfoo\tbaz\t\tbim\n", "html": "
    foo\tbaz\t\tbim\n
    \n", "example": 2, - "start_line": 363, - "end_line": 368, + "start_line": 362, + "end_line": 367, "section": "Tabs" }, { "markdown": " a\ta\n ὐ\ta\n", "html": "
    a\ta\nὐ\ta\n
    \n", "example": 3, - "start_line": 370, - "end_line": 377, + "start_line": 369, + "end_line": 376, "section": "Tabs" }, { "markdown": " - foo\n\n\tbar\n", "html": "
      \n
    • \n

      foo

      \n

      bar

      \n
    • \n
    \n", "example": 4, - "start_line": 383, - "end_line": 394, + "start_line": 382, + "end_line": 393, "section": "Tabs" }, { "markdown": "- foo\n\n\t\tbar\n", "html": "
      \n
    • \n

      foo

      \n
        bar\n
      \n
    • \n
    \n", "example": 5, - "start_line": 396, - "end_line": 408, + "start_line": 395, + "end_line": 407, "section": "Tabs" }, { "markdown": ">\t\tfoo\n", "html": "
    \n
      foo\n
    \n
    \n", "example": 6, - "start_line": 419, - "end_line": 426, + "start_line": 418, + "end_line": 425, "section": "Tabs" }, { "markdown": "-\t\tfoo\n", "html": "
      \n
    • \n
        foo\n
      \n
    • \n
    \n", "example": 7, - "start_line": 428, - "end_line": 437, + "start_line": 427, + "end_line": 436, "section": "Tabs" }, { "markdown": " foo\n\tbar\n", "html": "
    foo\nbar\n
    \n", "example": 8, - "start_line": 440, - "end_line": 447, + "start_line": 439, + "end_line": 446, "section": "Tabs" }, { "markdown": " - foo\n - bar\n\t - baz\n", "html": "
      \n
    • foo\n
        \n
      • bar\n
          \n
        • baz
        • \n
        \n
      • \n
      \n
    • \n
    \n", "example": 9, - "start_line": 449, - "end_line": 465, + "start_line": 448, + "end_line": 464, "section": "Tabs" }, { "markdown": "#\tFoo\n", "html": "

    Foo

    \n", "example": 10, - "start_line": 467, - "end_line": 471, + "start_line": 466, + "end_line": 470, "section": "Tabs" }, { "markdown": "*\t*\t*\t\n", "html": "
    \n", "example": 11, - "start_line": 473, - "end_line": 477, + "start_line": 472, + "end_line": 476, "section": "Tabs" }, { "markdown": "\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\`\\{\\|\\}\\~\n", "html": "

    !"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~

    \n", "example": 12, - "start_line": 490, - "end_line": 494, + "start_line": 489, + "end_line": 493, "section": "Backslash escapes" }, { "markdown": "\\\t\\A\\a\\ \\3\\φ\\«\n", "html": "

    \\\t\\A\\a\\ \\3\\φ\\«

    \n", "example": 13, - "start_line": 500, - "end_line": 504, + "start_line": 499, + "end_line": 503, "section": "Backslash escapes" }, { "markdown": "\\*not emphasized*\n\\
    not a tag\n\\[not a link](/foo)\n\\`not code`\n1\\. not a list\n\\* not a list\n\\# not a heading\n\\[foo]: /url \"not a reference\"\n\\ö not a character entity\n", "html": "

    *not emphasized*\n<br/> not a tag\n[not a link](/foo)\n`not code`\n1. not a list\n* not a list\n# not a heading\n[foo]: /url "not a reference"\n&ouml; not a character entity

    \n", "example": 14, - "start_line": 510, - "end_line": 530, + "start_line": 509, + "end_line": 529, "section": "Backslash escapes" }, { "markdown": "\\\\*emphasis*\n", "html": "

    \\emphasis

    \n", "example": 15, - "start_line": 535, - "end_line": 539, + "start_line": 534, + "end_line": 538, "section": "Backslash escapes" }, { "markdown": "foo\\\nbar\n", "html": "

    foo
    \nbar

    \n", "example": 16, - "start_line": 544, - "end_line": 550, + "start_line": 543, + "end_line": 549, "section": "Backslash escapes" }, { "markdown": "`` \\[\\` ``\n", "html": "

    \\[\\`

    \n", "example": 17, - "start_line": 556, - "end_line": 560, + "start_line": 555, + "end_line": 559, "section": "Backslash escapes" }, { "markdown": " \\[\\]\n", "html": "
    \\[\\]\n
    \n", "example": 18, - "start_line": 563, - "end_line": 568, + "start_line": 562, + "end_line": 567, "section": "Backslash escapes" }, { "markdown": "~~~\n\\[\\]\n~~~\n", "html": "
    \\[\\]\n
    \n", "example": 19, - "start_line": 571, - "end_line": 578, + "start_line": 570, + "end_line": 577, "section": "Backslash escapes" }, { - "markdown": "\n", - "html": "

    http://example.com?find=\\*

    \n", + "markdown": "\n", + "html": "

    https://example.com?find=\\*

    \n", "example": 20, - "start_line": 581, - "end_line": 585, + "start_line": 580, + "end_line": 584, "section": "Backslash escapes" }, { "markdown": "\n", "html": "\n", "example": 21, - "start_line": 588, - "end_line": 592, + "start_line": 587, + "end_line": 591, "section": "Backslash escapes" }, { "markdown": "[foo](/bar\\* \"ti\\*tle\")\n", "html": "

    foo

    \n", "example": 22, - "start_line": 598, - "end_line": 602, + "start_line": 597, + "end_line": 601, "section": "Backslash escapes" }, { "markdown": "[foo]\n\n[foo]: /bar\\* \"ti\\*tle\"\n", "html": "

    foo

    \n", "example": 23, - "start_line": 605, - "end_line": 611, + "start_line": 604, + "end_line": 610, "section": "Backslash escapes" }, { "markdown": "``` foo\\+bar\nfoo\n```\n", "html": "
    foo\n
    \n", "example": 24, - "start_line": 614, - "end_line": 621, + "start_line": 613, + "end_line": 620, "section": "Backslash escapes" }, { "markdown": "  & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸\n", "html": "

      & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸

    \n", "example": 25, - "start_line": 650, - "end_line": 658, + "start_line": 649, + "end_line": 657, "section": "Entity and numeric character references" }, { "markdown": "# Ӓ Ϡ �\n", "html": "

    # Ӓ Ϡ �

    \n", "example": 26, - "start_line": 669, - "end_line": 673, + "start_line": 668, + "end_line": 672, "section": "Entity and numeric character references" }, { "markdown": "" ആ ಫ\n", "html": "

    " ആ ಫ

    \n", "example": 27, - "start_line": 682, - "end_line": 686, + "start_line": 681, + "end_line": 685, "section": "Entity and numeric character references" }, { "markdown": "  &x; &#; &#x;\n�\n&#abcdef0;\n&ThisIsNotDefined; &hi?;\n", "html": "

    &nbsp &x; &#; &#x;\n&#87654321;\n&#abcdef0;\n&ThisIsNotDefined; &hi?;

    \n", "example": 28, - "start_line": 691, - "end_line": 701, + "start_line": 690, + "end_line": 700, "section": "Entity and numeric character references" }, { "markdown": "©\n", "html": "

    &copy

    \n", "example": 29, - "start_line": 708, - "end_line": 712, + "start_line": 707, + "end_line": 711, "section": "Entity and numeric character references" }, { "markdown": "&MadeUpEntity;\n", "html": "

    &MadeUpEntity;

    \n", "example": 30, - "start_line": 718, - "end_line": 722, + "start_line": 717, + "end_line": 721, "section": "Entity and numeric character references" }, { "markdown": "\n", "html": "\n", "example": 31, - "start_line": 729, - "end_line": 733, + "start_line": 728, + "end_line": 732, "section": "Entity and numeric character references" }, { "markdown": "[foo](/föö \"föö\")\n", "html": "

    foo

    \n", "example": 32, - "start_line": 736, - "end_line": 740, + "start_line": 735, + "end_line": 739, "section": "Entity and numeric character references" }, { "markdown": "[foo]\n\n[foo]: /föö \"föö\"\n", "html": "

    foo

    \n", "example": 33, - "start_line": 743, - "end_line": 749, + "start_line": 742, + "end_line": 748, "section": "Entity and numeric character references" }, { "markdown": "``` föö\nfoo\n```\n", "html": "
    foo\n
    \n", "example": 34, - "start_line": 752, - "end_line": 759, + "start_line": 751, + "end_line": 758, "section": "Entity and numeric character references" }, { "markdown": "`föö`\n", "html": "

    f&ouml;&ouml;

    \n", "example": 35, - "start_line": 765, - "end_line": 769, + "start_line": 764, + "end_line": 768, "section": "Entity and numeric character references" }, { "markdown": " föfö\n", "html": "
    f&ouml;f&ouml;\n
    \n", "example": 36, - "start_line": 772, - "end_line": 777, + "start_line": 771, + "end_line": 776, "section": "Entity and numeric character references" }, { "markdown": "*foo*\n*foo*\n", "html": "

    *foo*\nfoo

    \n", "example": 37, - "start_line": 784, - "end_line": 790, + "start_line": 783, + "end_line": 789, "section": "Entity and numeric character references" }, { "markdown": "* foo\n\n* foo\n", "html": "

    * foo

    \n
      \n
    • foo
    • \n
    \n", "example": 38, - "start_line": 792, - "end_line": 801, + "start_line": 791, + "end_line": 800, "section": "Entity and numeric character references" }, { "markdown": "foo bar\n", "html": "

    foo\n\nbar

    \n", "example": 39, - "start_line": 803, - "end_line": 809, + "start_line": 802, + "end_line": 808, "section": "Entity and numeric character references" }, { "markdown": " foo\n", "html": "

    \tfoo

    \n", "example": 40, - "start_line": 811, - "end_line": 815, + "start_line": 810, + "end_line": 814, "section": "Entity and numeric character references" }, { "markdown": "[a](url "tit")\n", "html": "

    [a](url "tit")

    \n", "example": 41, - "start_line": 818, - "end_line": 822, + "start_line": 817, + "end_line": 821, "section": "Entity and numeric character references" }, { "markdown": "- `one\n- two`\n", "html": "
      \n
    • `one
    • \n
    • two`
    • \n
    \n", "example": 42, - "start_line": 841, - "end_line": 849, + "start_line": 840, + "end_line": 848, "section": "Precedence" }, { "markdown": "***\n---\n___\n", "html": "
    \n
    \n
    \n", "example": 43, - "start_line": 880, - "end_line": 888, + "start_line": 879, + "end_line": 887, "section": "Thematic breaks" }, { "markdown": "+++\n", "html": "

    +++

    \n", "example": 44, - "start_line": 893, - "end_line": 897, + "start_line": 892, + "end_line": 896, "section": "Thematic breaks" }, { "markdown": "===\n", "html": "

    ===

    \n", "example": 45, - "start_line": 900, - "end_line": 904, + "start_line": 899, + "end_line": 903, "section": "Thematic breaks" }, { "markdown": "--\n**\n__\n", "html": "

    --\n**\n__

    \n", "example": 46, - "start_line": 909, - "end_line": 917, + "start_line": 908, + "end_line": 916, "section": "Thematic breaks" }, { "markdown": " ***\n ***\n ***\n", "html": "
    \n
    \n
    \n", "example": 47, - "start_line": 922, - "end_line": 930, + "start_line": 921, + "end_line": 929, "section": "Thematic breaks" }, { "markdown": " ***\n", "html": "
    ***\n
    \n", "example": 48, - "start_line": 935, - "end_line": 940, + "start_line": 934, + "end_line": 939, "section": "Thematic breaks" }, { "markdown": "Foo\n ***\n", "html": "

    Foo\n***

    \n", "example": 49, - "start_line": 943, - "end_line": 949, + "start_line": 942, + "end_line": 948, "section": "Thematic breaks" }, { "markdown": "_____________________________________\n", "html": "
    \n", "example": 50, - "start_line": 954, - "end_line": 958, + "start_line": 953, + "end_line": 957, "section": "Thematic breaks" }, { "markdown": " - - -\n", "html": "
    \n", "example": 51, - "start_line": 963, - "end_line": 967, + "start_line": 962, + "end_line": 966, "section": "Thematic breaks" }, { "markdown": " ** * ** * ** * **\n", "html": "
    \n", "example": 52, - "start_line": 970, - "end_line": 974, + "start_line": 969, + "end_line": 973, "section": "Thematic breaks" }, { "markdown": "- - - -\n", "html": "
    \n", "example": 53, - "start_line": 977, - "end_line": 981, + "start_line": 976, + "end_line": 980, "section": "Thematic breaks" }, { "markdown": "- - - - \n", "html": "
    \n", "example": 54, - "start_line": 986, - "end_line": 990, + "start_line": 985, + "end_line": 989, "section": "Thematic breaks" }, { "markdown": "_ _ _ _ a\n\na------\n\n---a---\n", "html": "

    _ _ _ _ a

    \n

    a------

    \n

    ---a---

    \n", "example": 55, - "start_line": 995, - "end_line": 1005, + "start_line": 994, + "end_line": 1004, "section": "Thematic breaks" }, { "markdown": " *-*\n", "html": "

    -

    \n", "example": 56, - "start_line": 1011, - "end_line": 1015, + "start_line": 1010, + "end_line": 1014, "section": "Thematic breaks" }, { "markdown": "- foo\n***\n- bar\n", "html": "
      \n
    • foo
    • \n
    \n
    \n
      \n
    • bar
    • \n
    \n", "example": 57, - "start_line": 1020, - "end_line": 1032, + "start_line": 1019, + "end_line": 1031, "section": "Thematic breaks" }, { "markdown": "Foo\n***\nbar\n", "html": "

    Foo

    \n
    \n

    bar

    \n", "example": 58, - "start_line": 1037, - "end_line": 1045, + "start_line": 1036, + "end_line": 1044, "section": "Thematic breaks" }, { "markdown": "Foo\n---\nbar\n", "html": "

    Foo

    \n

    bar

    \n", "example": 59, - "start_line": 1054, - "end_line": 1061, + "start_line": 1053, + "end_line": 1060, "section": "Thematic breaks" }, { "markdown": "* Foo\n* * *\n* Bar\n", "html": "
      \n
    • Foo
    • \n
    \n
    \n
      \n
    • Bar
    • \n
    \n", "example": 60, - "start_line": 1067, - "end_line": 1079, + "start_line": 1066, + "end_line": 1078, "section": "Thematic breaks" }, { "markdown": "- Foo\n- * * *\n", "html": "
      \n
    • Foo
    • \n
    • \n
      \n
    • \n
    \n", "example": 61, - "start_line": 1084, - "end_line": 1094, + "start_line": 1083, + "end_line": 1093, "section": "Thematic breaks" }, { "markdown": "# foo\n## foo\n### foo\n#### foo\n##### foo\n###### foo\n", "html": "

    foo

    \n

    foo

    \n

    foo

    \n

    foo

    \n
    foo
    \n
    foo
    \n", "example": 62, - "start_line": 1113, - "end_line": 1127, + "start_line": 1112, + "end_line": 1126, "section": "ATX headings" }, { "markdown": "####### foo\n", "html": "

    ####### foo

    \n", "example": 63, - "start_line": 1132, - "end_line": 1136, + "start_line": 1131, + "end_line": 1135, "section": "ATX headings" }, { "markdown": "#5 bolt\n\n#hashtag\n", "html": "

    #5 bolt

    \n

    #hashtag

    \n", "example": 64, - "start_line": 1147, - "end_line": 1154, + "start_line": 1146, + "end_line": 1153, "section": "ATX headings" }, { "markdown": "\\## foo\n", "html": "

    ## foo

    \n", "example": 65, - "start_line": 1159, - "end_line": 1163, + "start_line": 1158, + "end_line": 1162, "section": "ATX headings" }, { "markdown": "# foo *bar* \\*baz\\*\n", "html": "

    foo bar *baz*

    \n", "example": 66, - "start_line": 1168, - "end_line": 1172, + "start_line": 1167, + "end_line": 1171, "section": "ATX headings" }, { "markdown": "# foo \n", "html": "

    foo

    \n", "example": 67, - "start_line": 1177, - "end_line": 1181, + "start_line": 1176, + "end_line": 1180, "section": "ATX headings" }, { "markdown": " ### foo\n ## foo\n # foo\n", "html": "

    foo

    \n

    foo

    \n

    foo

    \n", "example": 68, - "start_line": 1186, - "end_line": 1194, + "start_line": 1185, + "end_line": 1193, "section": "ATX headings" }, { "markdown": " # foo\n", "html": "
    # foo\n
    \n", "example": 69, - "start_line": 1199, - "end_line": 1204, + "start_line": 1198, + "end_line": 1203, "section": "ATX headings" }, { "markdown": "foo\n # bar\n", "html": "

    foo\n# bar

    \n", "example": 70, - "start_line": 1207, - "end_line": 1213, + "start_line": 1206, + "end_line": 1212, "section": "ATX headings" }, { "markdown": "## foo ##\n ### bar ###\n", "html": "

    foo

    \n

    bar

    \n", "example": 71, - "start_line": 1218, - "end_line": 1224, + "start_line": 1217, + "end_line": 1223, "section": "ATX headings" }, { "markdown": "# foo ##################################\n##### foo ##\n", "html": "

    foo

    \n
    foo
    \n", "example": 72, - "start_line": 1229, - "end_line": 1235, + "start_line": 1228, + "end_line": 1234, "section": "ATX headings" }, { "markdown": "### foo ### \n", "html": "

    foo

    \n", "example": 73, - "start_line": 1240, - "end_line": 1244, + "start_line": 1239, + "end_line": 1243, "section": "ATX headings" }, { "markdown": "### foo ### b\n", "html": "

    foo ### b

    \n", "example": 74, - "start_line": 1251, - "end_line": 1255, + "start_line": 1250, + "end_line": 1254, "section": "ATX headings" }, { "markdown": "# foo#\n", "html": "

    foo#

    \n", "example": 75, - "start_line": 1260, - "end_line": 1264, + "start_line": 1259, + "end_line": 1263, "section": "ATX headings" }, { "markdown": "### foo \\###\n## foo #\\##\n# foo \\#\n", "html": "

    foo ###

    \n

    foo ###

    \n

    foo #

    \n", "example": 76, - "start_line": 1270, - "end_line": 1278, + "start_line": 1269, + "end_line": 1277, "section": "ATX headings" }, { "markdown": "****\n## foo\n****\n", "html": "
    \n

    foo

    \n
    \n", "example": 77, - "start_line": 1284, - "end_line": 1292, + "start_line": 1283, + "end_line": 1291, "section": "ATX headings" }, { "markdown": "Foo bar\n# baz\nBar foo\n", "html": "

    Foo bar

    \n

    baz

    \n

    Bar foo

    \n", "example": 78, - "start_line": 1295, - "end_line": 1303, + "start_line": 1294, + "end_line": 1302, "section": "ATX headings" }, { "markdown": "## \n#\n### ###\n", "html": "

    \n

    \n

    \n", "example": 79, - "start_line": 1308, - "end_line": 1316, + "start_line": 1307, + "end_line": 1315, "section": "ATX headings" }, { "markdown": "Foo *bar*\n=========\n\nFoo *bar*\n---------\n", "html": "

    Foo bar

    \n

    Foo bar

    \n", "example": 80, - "start_line": 1351, - "end_line": 1360, + "start_line": 1347, + "end_line": 1356, "section": "Setext headings" }, { "markdown": "Foo *bar\nbaz*\n====\n", "html": "

    Foo bar\nbaz

    \n", "example": 81, - "start_line": 1365, - "end_line": 1372, + "start_line": 1361, + "end_line": 1368, "section": "Setext headings" }, { "markdown": " Foo *bar\nbaz*\t\n====\n", "html": "

    Foo bar\nbaz

    \n", "example": 82, - "start_line": 1379, - "end_line": 1386, + "start_line": 1375, + "end_line": 1382, "section": "Setext headings" }, { "markdown": "Foo\n-------------------------\n\nFoo\n=\n", "html": "

    Foo

    \n

    Foo

    \n", "example": 83, - "start_line": 1391, - "end_line": 1400, + "start_line": 1387, + "end_line": 1396, "section": "Setext headings" }, { "markdown": " Foo\n---\n\n Foo\n-----\n\n Foo\n ===\n", "html": "

    Foo

    \n

    Foo

    \n

    Foo

    \n", "example": 84, - "start_line": 1406, - "end_line": 1419, + "start_line": 1402, + "end_line": 1415, "section": "Setext headings" }, { "markdown": " Foo\n ---\n\n Foo\n---\n", "html": "
    Foo\n---\n\nFoo\n
    \n
    \n", "example": 85, - "start_line": 1424, - "end_line": 1437, + "start_line": 1420, + "end_line": 1433, "section": "Setext headings" }, { "markdown": "Foo\n ---- \n", "html": "

    Foo

    \n", "example": 86, - "start_line": 1443, - "end_line": 1448, + "start_line": 1439, + "end_line": 1444, "section": "Setext headings" }, { "markdown": "Foo\n ---\n", "html": "

    Foo\n---

    \n", "example": 87, - "start_line": 1453, - "end_line": 1459, + "start_line": 1449, + "end_line": 1455, "section": "Setext headings" }, { "markdown": "Foo\n= =\n\nFoo\n--- -\n", "html": "

    Foo\n= =

    \n

    Foo

    \n
    \n", "example": 88, - "start_line": 1464, - "end_line": 1475, + "start_line": 1460, + "end_line": 1471, "section": "Setext headings" }, { "markdown": "Foo \n-----\n", "html": "

    Foo

    \n", "example": 89, - "start_line": 1480, - "end_line": 1485, + "start_line": 1476, + "end_line": 1481, "section": "Setext headings" }, { "markdown": "Foo\\\n----\n", "html": "

    Foo\\

    \n", "example": 90, - "start_line": 1490, - "end_line": 1495, + "start_line": 1486, + "end_line": 1491, "section": "Setext headings" }, { "markdown": "`Foo\n----\n`\n\n\n", "html": "

    `Foo

    \n

    `

    \n

    <a title="a lot

    \n

    of dashes"/>

    \n", "example": 91, - "start_line": 1501, - "end_line": 1514, + "start_line": 1497, + "end_line": 1510, "section": "Setext headings" }, { "markdown": "> Foo\n---\n", "html": "
    \n

    Foo

    \n
    \n
    \n", "example": 92, - "start_line": 1520, - "end_line": 1528, + "start_line": 1516, + "end_line": 1524, "section": "Setext headings" }, { "markdown": "> foo\nbar\n===\n", "html": "
    \n

    foo\nbar\n===

    \n
    \n", "example": 93, - "start_line": 1531, - "end_line": 1541, + "start_line": 1527, + "end_line": 1537, "section": "Setext headings" }, { "markdown": "- Foo\n---\n", "html": "
      \n
    • Foo
    • \n
    \n
    \n", "example": 94, - "start_line": 1544, - "end_line": 1552, + "start_line": 1540, + "end_line": 1548, "section": "Setext headings" }, { "markdown": "Foo\nBar\n---\n", "html": "

    Foo\nBar

    \n", "example": 95, - "start_line": 1559, - "end_line": 1566, + "start_line": 1555, + "end_line": 1562, "section": "Setext headings" }, { "markdown": "---\nFoo\n---\nBar\n---\nBaz\n", "html": "
    \n

    Foo

    \n

    Bar

    \n

    Baz

    \n", "example": 96, - "start_line": 1572, - "end_line": 1584, + "start_line": 1568, + "end_line": 1580, "section": "Setext headings" }, { "markdown": "\n====\n", "html": "

    ====

    \n", "example": 97, - "start_line": 1589, - "end_line": 1594, + "start_line": 1585, + "end_line": 1590, "section": "Setext headings" }, { "markdown": "---\n---\n", "html": "
    \n
    \n", "example": 98, - "start_line": 1601, - "end_line": 1607, + "start_line": 1597, + "end_line": 1603, "section": "Setext headings" }, { "markdown": "- foo\n-----\n", "html": "
      \n
    • foo
    • \n
    \n
    \n", "example": 99, - "start_line": 1610, - "end_line": 1618, + "start_line": 1606, + "end_line": 1614, "section": "Setext headings" }, { "markdown": " foo\n---\n", "html": "
    foo\n
    \n
    \n", "example": 100, - "start_line": 1621, - "end_line": 1628, + "start_line": 1617, + "end_line": 1624, "section": "Setext headings" }, { "markdown": "> foo\n-----\n", "html": "
    \n

    foo

    \n
    \n
    \n", "example": 101, - "start_line": 1631, - "end_line": 1639, + "start_line": 1627, + "end_line": 1635, "section": "Setext headings" }, { "markdown": "\\> foo\n------\n", "html": "

    > foo

    \n", "example": 102, - "start_line": 1645, - "end_line": 1650, + "start_line": 1641, + "end_line": 1646, "section": "Setext headings" }, { "markdown": "Foo\n\nbar\n---\nbaz\n", "html": "

    Foo

    \n

    bar

    \n

    baz

    \n", "example": 103, - "start_line": 1676, - "end_line": 1686, + "start_line": 1672, + "end_line": 1682, "section": "Setext headings" }, { "markdown": "Foo\nbar\n\n---\n\nbaz\n", "html": "

    Foo\nbar

    \n
    \n

    baz

    \n", "example": 104, - "start_line": 1692, - "end_line": 1704, + "start_line": 1688, + "end_line": 1700, "section": "Setext headings" }, { "markdown": "Foo\nbar\n* * *\nbaz\n", "html": "

    Foo\nbar

    \n
    \n

    baz

    \n", "example": 105, - "start_line": 1710, - "end_line": 1720, + "start_line": 1706, + "end_line": 1716, "section": "Setext headings" }, { "markdown": "Foo\nbar\n\\---\nbaz\n", "html": "

    Foo\nbar\n---\nbaz

    \n", "example": 106, - "start_line": 1725, - "end_line": 1735, + "start_line": 1721, + "end_line": 1731, "section": "Setext headings" }, { "markdown": " a simple\n indented code block\n", "html": "
    a simple\n  indented code block\n
    \n", "example": 107, - "start_line": 1753, - "end_line": 1760, + "start_line": 1749, + "end_line": 1756, "section": "Indented code blocks" }, { "markdown": " - foo\n\n bar\n", "html": "
      \n
    • \n

      foo

      \n

      bar

      \n
    • \n
    \n", "example": 108, - "start_line": 1767, - "end_line": 1778, + "start_line": 1763, + "end_line": 1774, "section": "Indented code blocks" }, { "markdown": "1. foo\n\n - bar\n", "html": "
      \n
    1. \n

      foo

      \n
        \n
      • bar
      • \n
      \n
    2. \n
    \n", "example": 109, - "start_line": 1781, - "end_line": 1794, + "start_line": 1777, + "end_line": 1790, "section": "Indented code blocks" }, { "markdown": "
    \n *hi*\n\n - one\n", "html": "
    <a/>\n*hi*\n\n- one\n
    \n", "example": 110, - "start_line": 1801, - "end_line": 1812, + "start_line": 1797, + "end_line": 1808, "section": "Indented code blocks" }, { "markdown": " chunk1\n\n chunk2\n \n \n \n chunk3\n", "html": "
    chunk1\n\nchunk2\n\n\n\nchunk3\n
    \n", "example": 111, - "start_line": 1817, - "end_line": 1834, + "start_line": 1813, + "end_line": 1830, "section": "Indented code blocks" }, { "markdown": " chunk1\n \n chunk2\n", "html": "
    chunk1\n  \n  chunk2\n
    \n", "example": 112, - "start_line": 1840, - "end_line": 1849, + "start_line": 1836, + "end_line": 1845, "section": "Indented code blocks" }, { "markdown": "Foo\n bar\n\n", "html": "

    Foo\nbar

    \n", "example": 113, - "start_line": 1855, - "end_line": 1862, + "start_line": 1851, + "end_line": 1858, "section": "Indented code blocks" }, { "markdown": " foo\nbar\n", "html": "
    foo\n
    \n

    bar

    \n", "example": 114, - "start_line": 1869, - "end_line": 1876, + "start_line": 1865, + "end_line": 1872, "section": "Indented code blocks" }, { "markdown": "# Heading\n foo\nHeading\n------\n foo\n----\n", "html": "

    Heading

    \n
    foo\n
    \n

    Heading

    \n
    foo\n
    \n
    \n", "example": 115, - "start_line": 1882, - "end_line": 1897, + "start_line": 1878, + "end_line": 1893, "section": "Indented code blocks" }, { "markdown": " foo\n bar\n", "html": "
        foo\nbar\n
    \n", "example": 116, - "start_line": 1902, - "end_line": 1909, + "start_line": 1898, + "end_line": 1905, "section": "Indented code blocks" }, { "markdown": "\n \n foo\n \n\n", "html": "
    foo\n
    \n", "example": 117, - "start_line": 1915, - "end_line": 1924, + "start_line": 1911, + "end_line": 1920, "section": "Indented code blocks" }, { "markdown": " foo \n", "html": "
    foo  \n
    \n", "example": 118, - "start_line": 1929, - "end_line": 1934, + "start_line": 1925, + "end_line": 1930, "section": "Indented code blocks" }, { "markdown": "```\n<\n >\n```\n", "html": "
    <\n >\n
    \n", "example": 119, - "start_line": 1984, - "end_line": 1993, + "start_line": 1980, + "end_line": 1989, "section": "Fenced code blocks" }, { "markdown": "~~~\n<\n >\n~~~\n", "html": "
    <\n >\n
    \n", "example": 120, - "start_line": 1998, - "end_line": 2007, + "start_line": 1994, + "end_line": 2003, "section": "Fenced code blocks" }, { "markdown": "``\nfoo\n``\n", "html": "

    foo

    \n", "example": 121, - "start_line": 2011, - "end_line": 2017, + "start_line": 2007, + "end_line": 2013, "section": "Fenced code blocks" }, { "markdown": "```\naaa\n~~~\n```\n", "html": "
    aaa\n~~~\n
    \n", "example": 122, - "start_line": 2022, - "end_line": 2031, + "start_line": 2018, + "end_line": 2027, "section": "Fenced code blocks" }, { "markdown": "~~~\naaa\n```\n~~~\n", "html": "
    aaa\n```\n
    \n", "example": 123, - "start_line": 2034, - "end_line": 2043, + "start_line": 2030, + "end_line": 2039, "section": "Fenced code blocks" }, { "markdown": "````\naaa\n```\n``````\n", "html": "
    aaa\n```\n
    \n", "example": 124, - "start_line": 2048, - "end_line": 2057, + "start_line": 2044, + "end_line": 2053, "section": "Fenced code blocks" }, { "markdown": "~~~~\naaa\n~~~\n~~~~\n", "html": "
    aaa\n~~~\n
    \n", "example": 125, - "start_line": 2060, - "end_line": 2069, + "start_line": 2056, + "end_line": 2065, "section": "Fenced code blocks" }, { "markdown": "```\n", "html": "
    \n", "example": 126, - "start_line": 2075, - "end_line": 2079, + "start_line": 2071, + "end_line": 2075, "section": "Fenced code blocks" }, { "markdown": "`````\n\n```\naaa\n", "html": "
    \n```\naaa\n
    \n", "example": 127, - "start_line": 2082, - "end_line": 2092, + "start_line": 2078, + "end_line": 2088, "section": "Fenced code blocks" }, { "markdown": "> ```\n> aaa\n\nbbb\n", "html": "
    \n
    aaa\n
    \n
    \n

    bbb

    \n", "example": 128, - "start_line": 2095, - "end_line": 2106, + "start_line": 2091, + "end_line": 2102, "section": "Fenced code blocks" }, { "markdown": "```\n\n \n```\n", "html": "
    \n  \n
    \n", "example": 129, - "start_line": 2111, - "end_line": 2120, + "start_line": 2107, + "end_line": 2116, "section": "Fenced code blocks" }, { "markdown": "```\n```\n", "html": "
    \n", "example": 130, - "start_line": 2125, - "end_line": 2130, + "start_line": 2121, + "end_line": 2126, "section": "Fenced code blocks" }, { "markdown": " ```\n aaa\naaa\n```\n", "html": "
    aaa\naaa\n
    \n", "example": 131, - "start_line": 2137, - "end_line": 2146, + "start_line": 2133, + "end_line": 2142, "section": "Fenced code blocks" }, { "markdown": " ```\naaa\n aaa\naaa\n ```\n", "html": "
    aaa\naaa\naaa\n
    \n", "example": 132, - "start_line": 2149, - "end_line": 2160, + "start_line": 2145, + "end_line": 2156, "section": "Fenced code blocks" }, { "markdown": " ```\n aaa\n aaa\n aaa\n ```\n", "html": "
    aaa\n aaa\naaa\n
    \n", "example": 133, - "start_line": 2163, - "end_line": 2174, + "start_line": 2159, + "end_line": 2170, "section": "Fenced code blocks" }, { "markdown": " ```\n aaa\n ```\n", "html": "
    ```\naaa\n```\n
    \n", "example": 134, - "start_line": 2179, - "end_line": 2188, + "start_line": 2175, + "end_line": 2184, "section": "Fenced code blocks" }, { "markdown": "```\naaa\n ```\n", "html": "
    aaa\n
    \n", "example": 135, - "start_line": 2194, - "end_line": 2201, + "start_line": 2190, + "end_line": 2197, "section": "Fenced code blocks" }, { "markdown": " ```\naaa\n ```\n", "html": "
    aaa\n
    \n", "example": 136, - "start_line": 2204, - "end_line": 2211, + "start_line": 2200, + "end_line": 2207, "section": "Fenced code blocks" }, { "markdown": "```\naaa\n ```\n", "html": "
    aaa\n    ```\n
    \n", "example": 137, - "start_line": 2216, - "end_line": 2224, + "start_line": 2212, + "end_line": 2220, "section": "Fenced code blocks" }, { "markdown": "``` ```\naaa\n", "html": "

    \naaa

    \n", "example": 138, - "start_line": 2230, - "end_line": 2236, + "start_line": 2226, + "end_line": 2232, "section": "Fenced code blocks" }, { "markdown": "~~~~~~\naaa\n~~~ ~~\n", "html": "
    aaa\n~~~ ~~\n
    \n", "example": 139, - "start_line": 2239, - "end_line": 2247, + "start_line": 2235, + "end_line": 2243, "section": "Fenced code blocks" }, { "markdown": "foo\n```\nbar\n```\nbaz\n", "html": "

    foo

    \n
    bar\n
    \n

    baz

    \n", "example": 140, - "start_line": 2253, - "end_line": 2264, + "start_line": 2249, + "end_line": 2260, "section": "Fenced code blocks" }, { "markdown": "foo\n---\n~~~\nbar\n~~~\n# baz\n", "html": "

    foo

    \n
    bar\n
    \n

    baz

    \n", "example": 141, - "start_line": 2270, - "end_line": 2282, + "start_line": 2266, + "end_line": 2278, "section": "Fenced code blocks" }, { "markdown": "```ruby\ndef foo(x)\n return 3\nend\n```\n", "html": "
    def foo(x)\n  return 3\nend\n
    \n", "example": 142, - "start_line": 2292, - "end_line": 2303, + "start_line": 2288, + "end_line": 2299, "section": "Fenced code blocks" }, { "markdown": "~~~~ ruby startline=3 $%@#$\ndef foo(x)\n return 3\nend\n~~~~~~~\n", "html": "
    def foo(x)\n  return 3\nend\n
    \n", "example": 143, - "start_line": 2306, - "end_line": 2317, + "start_line": 2302, + "end_line": 2313, "section": "Fenced code blocks" }, { "markdown": "````;\n````\n", "html": "
    \n", "example": 144, - "start_line": 2320, - "end_line": 2325, + "start_line": 2316, + "end_line": 2321, "section": "Fenced code blocks" }, { "markdown": "``` aa ```\nfoo\n", "html": "

    aa\nfoo

    \n", "example": 145, - "start_line": 2330, - "end_line": 2336, + "start_line": 2326, + "end_line": 2332, "section": "Fenced code blocks" }, { "markdown": "~~~ aa ``` ~~~\nfoo\n~~~\n", "html": "
    foo\n
    \n", "example": 146, - "start_line": 2341, - "end_line": 2348, + "start_line": 2337, + "end_line": 2344, "section": "Fenced code blocks" }, { "markdown": "```\n``` aaa\n```\n", "html": "
    ``` aaa\n
    \n", "example": 147, - "start_line": 2353, - "end_line": 2360, + "start_line": 2349, + "end_line": 2356, "section": "Fenced code blocks" }, { "markdown": "
    \n
    \n**Hello**,\n\n_world_.\n
    \n
    \n", "html": "
    \n
    \n**Hello**,\n

    world.\n

    \n
    \n", "example": 148, - "start_line": 2432, - "end_line": 2447, + "start_line": 2428, + "end_line": 2443, "section": "HTML blocks" }, { "markdown": "\n \n \n \n
    \n hi\n
    \n\nokay.\n", "html": "\n \n \n \n
    \n hi\n
    \n

    okay.

    \n", "example": 149, - "start_line": 2461, - "end_line": 2480, + "start_line": 2457, + "end_line": 2476, "section": "HTML blocks" }, { "markdown": "
    \n*foo*\n", "example": 151, - "start_line": 2496, - "end_line": 2502, + "start_line": 2492, + "end_line": 2498, "section": "HTML blocks" }, { "markdown": "
    \n\n*Markdown*\n\n
    \n", "html": "
    \n

    Markdown

    \n
    \n", "example": 152, - "start_line": 2507, - "end_line": 2517, + "start_line": 2503, + "end_line": 2513, "section": "HTML blocks" }, { "markdown": "
    \n
    \n", "html": "
    \n
    \n", "example": 153, - "start_line": 2523, - "end_line": 2531, + "start_line": 2519, + "end_line": 2527, "section": "HTML blocks" }, { "markdown": "
    \n
    \n", "html": "
    \n
    \n", "example": 154, - "start_line": 2534, - "end_line": 2542, + "start_line": 2530, + "end_line": 2538, "section": "HTML blocks" }, { "markdown": "
    \n*foo*\n\n*bar*\n", "html": "
    \n*foo*\n

    bar

    \n", "example": 155, - "start_line": 2546, - "end_line": 2555, + "start_line": 2542, + "end_line": 2551, "section": "HTML blocks" }, { "markdown": "
    \n", "html": "\n", "example": 159, - "start_line": 2595, - "end_line": 2599, + "start_line": 2591, + "end_line": 2595, "section": "HTML blocks" }, { "markdown": "
    \nfoo\n
    \n", "html": "
    \nfoo\n
    \n", "example": 160, - "start_line": 2602, - "end_line": 2610, + "start_line": 2598, + "end_line": 2606, "section": "HTML blocks" }, { "markdown": "
    \n``` c\nint x = 33;\n```\n", "html": "
    \n``` c\nint x = 33;\n```\n", "example": 161, - "start_line": 2619, - "end_line": 2629, + "start_line": 2615, + "end_line": 2625, "section": "HTML blocks" }, { "markdown": "\n*bar*\n\n", "html": "\n*bar*\n\n", "example": 162, - "start_line": 2636, - "end_line": 2644, + "start_line": 2632, + "end_line": 2640, "section": "HTML blocks" }, { "markdown": "\n*bar*\n\n", "html": "\n*bar*\n\n", "example": 163, - "start_line": 2649, - "end_line": 2657, + "start_line": 2645, + "end_line": 2653, "section": "HTML blocks" }, { "markdown": "\n*bar*\n\n", "html": "\n*bar*\n\n", "example": 164, - "start_line": 2660, - "end_line": 2668, + "start_line": 2656, + "end_line": 2664, "section": "HTML blocks" }, { "markdown": "\n*bar*\n", "html": "\n*bar*\n", "example": 165, - "start_line": 2671, - "end_line": 2677, + "start_line": 2667, + "end_line": 2673, "section": "HTML blocks" }, { "markdown": "\n*foo*\n\n", "html": "\n*foo*\n\n", "example": 166, - "start_line": 2686, - "end_line": 2694, + "start_line": 2682, + "end_line": 2690, "section": "HTML blocks" }, { "markdown": "\n\n*foo*\n\n\n", "html": "\n

    foo

    \n
    \n", "example": 167, - "start_line": 2701, - "end_line": 2711, + "start_line": 2697, + "end_line": 2707, "section": "HTML blocks" }, { "markdown": "*foo*\n", "html": "

    foo

    \n", "example": 168, - "start_line": 2719, - "end_line": 2723, + "start_line": 2715, + "end_line": 2719, "section": "HTML blocks" }, { "markdown": "
    \nimport Text.HTML.TagSoup\n\nmain :: IO ()\nmain = print $ parseTags tags\n
    \nokay\n", "html": "
    \nimport Text.HTML.TagSoup\n\nmain :: IO ()\nmain = print $ parseTags tags\n
    \n

    okay

    \n", "example": 169, - "start_line": 2735, - "end_line": 2751, + "start_line": 2731, + "end_line": 2747, "section": "HTML blocks" }, { "markdown": "\nokay\n", "html": "\n

    okay

    \n", "example": 170, - "start_line": 2756, - "end_line": 2770, + "start_line": 2752, + "end_line": 2766, "section": "HTML blocks" }, { "markdown": "\n", "html": "\n", "example": 171, - "start_line": 2775, - "end_line": 2791, + "start_line": 2771, + "end_line": 2787, "section": "HTML blocks" }, { "markdown": "\nh1 {color:red;}\n\np {color:blue;}\n\nokay\n", "html": "\nh1 {color:red;}\n\np {color:blue;}\n\n

    okay

    \n", "example": 172, - "start_line": 2795, - "end_line": 2811, + "start_line": 2791, + "end_line": 2807, "section": "HTML blocks" }, { "markdown": "\n\nfoo\n", "html": "\n\nfoo\n", "example": 173, - "start_line": 2818, - "end_line": 2828, + "start_line": 2814, + "end_line": 2824, "section": "HTML blocks" }, { "markdown": ">
    \n> foo\n\nbar\n", "html": "
    \n
    \nfoo\n
    \n

    bar

    \n", "example": 174, - "start_line": 2831, - "end_line": 2842, + "start_line": 2827, + "end_line": 2838, "section": "HTML blocks" }, { "markdown": "-
    \n- foo\n", "html": "
      \n
    • \n
      \n
    • \n
    • foo
    • \n
    \n", "example": 175, - "start_line": 2845, - "end_line": 2855, + "start_line": 2841, + "end_line": 2851, "section": "HTML blocks" }, { "markdown": "\n*foo*\n", "html": "\n

    foo

    \n", "example": 176, - "start_line": 2860, - "end_line": 2866, + "start_line": 2856, + "end_line": 2862, "section": "HTML blocks" }, { "markdown": "*bar*\n*baz*\n", "html": "*bar*\n

    baz

    \n", "example": 177, - "start_line": 2869, - "end_line": 2875, + "start_line": 2865, + "end_line": 2871, "section": "HTML blocks" }, { "markdown": "1. *bar*\n", "html": "1. *bar*\n", "example": 178, - "start_line": 2881, - "end_line": 2889, + "start_line": 2877, + "end_line": 2885, "section": "HTML blocks" }, { "markdown": "\nokay\n", "html": "\n

    okay

    \n", "example": 179, - "start_line": 2894, - "end_line": 2906, + "start_line": 2890, + "end_line": 2902, "section": "HTML blocks" }, { "markdown": "';\n\n?>\nokay\n", "html": "';\n\n?>\n

    okay

    \n", "example": 180, - "start_line": 2912, - "end_line": 2926, + "start_line": 2908, + "end_line": 2922, "section": "HTML blocks" }, { "markdown": "\n", "html": "\n", "example": 181, - "start_line": 2931, - "end_line": 2935, + "start_line": 2927, + "end_line": 2931, "section": "HTML blocks" }, { "markdown": "\nokay\n", "html": "\n

    okay

    \n", "example": 182, - "start_line": 2940, - "end_line": 2968, + "start_line": 2936, + "end_line": 2964, "section": "HTML blocks" }, { "markdown": " \n\n \n", "html": " \n
    <!-- foo -->\n
    \n", "example": 183, - "start_line": 2974, - "end_line": 2982, + "start_line": 2970, + "end_line": 2978, "section": "HTML blocks" }, { "markdown": "
    \n\n
    \n", "html": "
    \n
    <div>\n
    \n", "example": 184, - "start_line": 2985, - "end_line": 2993, + "start_line": 2981, + "end_line": 2989, "section": "HTML blocks" }, { "markdown": "Foo\n
    \nbar\n
    \n", "html": "

    Foo

    \n
    \nbar\n
    \n", "example": 185, - "start_line": 2999, - "end_line": 3009, + "start_line": 2995, + "end_line": 3005, "section": "HTML blocks" }, { "markdown": "
    \nbar\n
    \n*foo*\n", "html": "
    \nbar\n
    \n*foo*\n", "example": 186, - "start_line": 3016, - "end_line": 3026, + "start_line": 3012, + "end_line": 3022, "section": "HTML blocks" }, { "markdown": "Foo\n\nbaz\n", "html": "

    Foo\n\nbaz

    \n", "example": 187, - "start_line": 3031, - "end_line": 3039, + "start_line": 3027, + "end_line": 3035, "section": "HTML blocks" }, { "markdown": "
    \n\n*Emphasized* text.\n\n
    \n", "html": "
    \n

    Emphasized text.

    \n
    \n", "example": 188, - "start_line": 3072, - "end_line": 3082, + "start_line": 3068, + "end_line": 3078, "section": "HTML blocks" }, { "markdown": "
    \n*Emphasized* text.\n
    \n", "html": "
    \n*Emphasized* text.\n
    \n", "example": 189, - "start_line": 3085, - "end_line": 3093, + "start_line": 3081, + "end_line": 3089, "section": "HTML blocks" }, { "markdown": "\n\n\n\n\n\n\n\n
    \nHi\n
    \n", "html": "\n\n\n\n
    \nHi\n
    \n", "example": 190, - "start_line": 3107, - "end_line": 3127, + "start_line": 3103, + "end_line": 3123, "section": "HTML blocks" }, { "markdown": "\n\n \n\n \n\n \n\n
    \n Hi\n
    \n", "html": "\n \n
    <td>\n  Hi\n</td>\n
    \n \n
    \n", "example": 191, - "start_line": 3134, - "end_line": 3155, + "start_line": 3130, + "end_line": 3151, "section": "HTML blocks" }, { "markdown": "[foo]: /url \"title\"\n\n[foo]\n", "html": "

    foo

    \n", "example": 192, - "start_line": 3183, - "end_line": 3189, + "start_line": 3179, + "end_line": 3185, "section": "Link reference definitions" }, { "markdown": " [foo]: \n /url \n 'the title' \n\n[foo]\n", "html": "

    foo

    \n", "example": 193, - "start_line": 3192, - "end_line": 3200, + "start_line": 3188, + "end_line": 3196, "section": "Link reference definitions" }, { "markdown": "[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]\n", "html": "

    Foo*bar]

    \n", "example": 194, - "start_line": 3203, - "end_line": 3209, + "start_line": 3199, + "end_line": 3205, "section": "Link reference definitions" }, { "markdown": "[Foo bar]:\n\n'title'\n\n[Foo bar]\n", "html": "

    Foo bar

    \n", "example": 195, - "start_line": 3212, - "end_line": 3220, + "start_line": 3208, + "end_line": 3216, "section": "Link reference definitions" }, { "markdown": "[foo]: /url '\ntitle\nline1\nline2\n'\n\n[foo]\n", "html": "

    foo

    \n", "example": 196, - "start_line": 3225, - "end_line": 3239, + "start_line": 3221, + "end_line": 3235, "section": "Link reference definitions" }, { "markdown": "[foo]: /url 'title\n\nwith blank line'\n\n[foo]\n", "html": "

    [foo]: /url 'title

    \n

    with blank line'

    \n

    [foo]

    \n", "example": 197, - "start_line": 3244, - "end_line": 3254, + "start_line": 3240, + "end_line": 3250, "section": "Link reference definitions" }, { "markdown": "[foo]:\n/url\n\n[foo]\n", "html": "

    foo

    \n", "example": 198, - "start_line": 3259, - "end_line": 3266, + "start_line": 3255, + "end_line": 3262, "section": "Link reference definitions" }, { "markdown": "[foo]:\n\n[foo]\n", "html": "

    [foo]:

    \n

    [foo]

    \n", "example": 199, - "start_line": 3271, - "end_line": 3278, + "start_line": 3267, + "end_line": 3274, "section": "Link reference definitions" }, { "markdown": "[foo]: <>\n\n[foo]\n", "html": "

    foo

    \n", "example": 200, - "start_line": 3283, - "end_line": 3289, + "start_line": 3279, + "end_line": 3285, "section": "Link reference definitions" }, { "markdown": "[foo]: (baz)\n\n[foo]\n", "html": "

    [foo]: (baz)

    \n

    [foo]

    \n", "example": 201, - "start_line": 3294, - "end_line": 3301, + "start_line": 3290, + "end_line": 3297, "section": "Link reference definitions" }, { "markdown": "[foo]: /url\\bar\\*baz \"foo\\\"bar\\baz\"\n\n[foo]\n", "html": "

    foo

    \n", "example": 202, - "start_line": 3307, - "end_line": 3313, + "start_line": 3303, + "end_line": 3309, "section": "Link reference definitions" }, { "markdown": "[foo]\n\n[foo]: url\n", "html": "

    foo

    \n", "example": 203, - "start_line": 3318, - "end_line": 3324, + "start_line": 3314, + "end_line": 3320, "section": "Link reference definitions" }, { "markdown": "[foo]\n\n[foo]: first\n[foo]: second\n", "html": "

    foo

    \n", "example": 204, - "start_line": 3330, - "end_line": 3337, + "start_line": 3326, + "end_line": 3333, "section": "Link reference definitions" }, { "markdown": "[FOO]: /url\n\n[Foo]\n", "html": "

    Foo

    \n", "example": 205, - "start_line": 3343, - "end_line": 3349, + "start_line": 3339, + "end_line": 3345, "section": "Link reference definitions" }, { "markdown": "[ΑΓΩ]: /φου\n\n[αγω]\n", "html": "

    αγω

    \n", "example": 206, - "start_line": 3352, - "end_line": 3358, + "start_line": 3348, + "end_line": 3354, "section": "Link reference definitions" }, { "markdown": "[foo]: /url\n", "html": "", "example": 207, - "start_line": 3367, - "end_line": 3370, + "start_line": 3363, + "end_line": 3366, "section": "Link reference definitions" }, { "markdown": "[\nfoo\n]: /url\nbar\n", "html": "

    bar

    \n", "example": 208, - "start_line": 3375, - "end_line": 3382, + "start_line": 3371, + "end_line": 3378, "section": "Link reference definitions" }, { "markdown": "[foo]: /url \"title\" ok\n", "html": "

    [foo]: /url "title" ok

    \n", "example": 209, - "start_line": 3388, - "end_line": 3392, + "start_line": 3384, + "end_line": 3388, "section": "Link reference definitions" }, { "markdown": "[foo]: /url\n\"title\" ok\n", "html": "

    "title" ok

    \n", "example": 210, - "start_line": 3397, - "end_line": 3402, + "start_line": 3393, + "end_line": 3398, "section": "Link reference definitions" }, { "markdown": " [foo]: /url \"title\"\n\n[foo]\n", "html": "
    [foo]: /url "title"\n
    \n

    [foo]

    \n", "example": 211, - "start_line": 3408, - "end_line": 3416, + "start_line": 3404, + "end_line": 3412, "section": "Link reference definitions" }, { "markdown": "```\n[foo]: /url\n```\n\n[foo]\n", "html": "
    [foo]: /url\n
    \n

    [foo]

    \n", "example": 212, - "start_line": 3422, - "end_line": 3432, + "start_line": 3418, + "end_line": 3428, "section": "Link reference definitions" }, { "markdown": "Foo\n[bar]: /baz\n\n[bar]\n", "html": "

    Foo\n[bar]: /baz

    \n

    [bar]

    \n", "example": 213, - "start_line": 3437, - "end_line": 3446, + "start_line": 3433, + "end_line": 3442, "section": "Link reference definitions" }, { "markdown": "# [Foo]\n[foo]: /url\n> bar\n", "html": "

    Foo

    \n
    \n

    bar

    \n
    \n", "example": 214, - "start_line": 3452, - "end_line": 3461, + "start_line": 3448, + "end_line": 3457, "section": "Link reference definitions" }, { "markdown": "[foo]: /url\nbar\n===\n[foo]\n", "html": "

    bar

    \n

    foo

    \n", "example": 215, - "start_line": 3463, - "end_line": 3471, + "start_line": 3459, + "end_line": 3467, "section": "Link reference definitions" }, { "markdown": "[foo]: /url\n===\n[foo]\n", "html": "

    ===\nfoo

    \n", "example": 216, - "start_line": 3473, - "end_line": 3480, + "start_line": 3469, + "end_line": 3476, "section": "Link reference definitions" }, { "markdown": "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]\n", "html": "

    foo,\nbar,\nbaz

    \n", "example": 217, - "start_line": 3486, - "end_line": 3499, + "start_line": 3482, + "end_line": 3495, "section": "Link reference definitions" }, { "markdown": "[foo]\n\n> [foo]: /url\n", "html": "

    foo

    \n
    \n
    \n", "example": 218, - "start_line": 3507, - "end_line": 3515, + "start_line": 3503, + "end_line": 3511, "section": "Link reference definitions" }, { "markdown": "aaa\n\nbbb\n", "html": "

    aaa

    \n

    bbb

    \n", "example": 219, - "start_line": 3529, - "end_line": 3536, + "start_line": 3525, + "end_line": 3532, "section": "Paragraphs" }, { "markdown": "aaa\nbbb\n\nccc\nddd\n", "html": "

    aaa\nbbb

    \n

    ccc\nddd

    \n", "example": 220, - "start_line": 3541, - "end_line": 3552, + "start_line": 3537, + "end_line": 3548, "section": "Paragraphs" }, { "markdown": "aaa\n\n\nbbb\n", "html": "

    aaa

    \n

    bbb

    \n", "example": 221, - "start_line": 3557, - "end_line": 3565, + "start_line": 3553, + "end_line": 3561, "section": "Paragraphs" }, { "markdown": " aaa\n bbb\n", "html": "

    aaa\nbbb

    \n", "example": 222, - "start_line": 3570, - "end_line": 3576, + "start_line": 3566, + "end_line": 3572, "section": "Paragraphs" }, { "markdown": "aaa\n bbb\n ccc\n", "html": "

    aaa\nbbb\nccc

    \n", "example": 223, - "start_line": 3582, - "end_line": 3590, + "start_line": 3578, + "end_line": 3586, "section": "Paragraphs" }, { "markdown": " aaa\nbbb\n", "html": "

    aaa\nbbb

    \n", "example": 224, - "start_line": 3596, - "end_line": 3602, + "start_line": 3592, + "end_line": 3598, "section": "Paragraphs" }, { "markdown": " aaa\nbbb\n", "html": "
    aaa\n
    \n

    bbb

    \n", "example": 225, - "start_line": 3605, - "end_line": 3612, + "start_line": 3601, + "end_line": 3608, "section": "Paragraphs" }, { "markdown": "aaa \nbbb \n", "html": "

    aaa
    \nbbb

    \n", "example": 226, - "start_line": 3619, - "end_line": 3625, + "start_line": 3615, + "end_line": 3621, "section": "Paragraphs" }, { "markdown": " \n\naaa\n \n\n# aaa\n\n \n", "html": "

    aaa

    \n

    aaa

    \n", "example": 227, - "start_line": 3636, - "end_line": 3648, + "start_line": 3632, + "end_line": 3644, "section": "Blank lines" }, { "markdown": "> # Foo\n> bar\n> baz\n", "html": "
    \n

    Foo

    \n

    bar\nbaz

    \n
    \n", "example": 228, - "start_line": 3704, - "end_line": 3714, + "start_line": 3700, + "end_line": 3710, "section": "Block quotes" }, { "markdown": "># Foo\n>bar\n> baz\n", "html": "
    \n

    Foo

    \n

    bar\nbaz

    \n
    \n", "example": 229, - "start_line": 3719, - "end_line": 3729, + "start_line": 3715, + "end_line": 3725, "section": "Block quotes" }, { "markdown": " > # Foo\n > bar\n > baz\n", "html": "
    \n

    Foo

    \n

    bar\nbaz

    \n
    \n", "example": 230, - "start_line": 3734, - "end_line": 3744, + "start_line": 3730, + "end_line": 3740, "section": "Block quotes" }, { "markdown": " > # Foo\n > bar\n > baz\n", "html": "
    > # Foo\n> bar\n> baz\n
    \n", "example": 231, - "start_line": 3749, - "end_line": 3758, + "start_line": 3745, + "end_line": 3754, "section": "Block quotes" }, { "markdown": "> # Foo\n> bar\nbaz\n", "html": "
    \n

    Foo

    \n

    bar\nbaz

    \n
    \n", "example": 232, - "start_line": 3764, - "end_line": 3774, + "start_line": 3760, + "end_line": 3770, "section": "Block quotes" }, { "markdown": "> bar\nbaz\n> foo\n", "html": "
    \n

    bar\nbaz\nfoo

    \n
    \n", "example": 233, - "start_line": 3780, - "end_line": 3790, + "start_line": 3776, + "end_line": 3786, "section": "Block quotes" }, { "markdown": "> foo\n---\n", "html": "
    \n

    foo

    \n
    \n
    \n", "example": 234, - "start_line": 3804, - "end_line": 3812, + "start_line": 3800, + "end_line": 3808, "section": "Block quotes" }, { "markdown": "> - foo\n- bar\n", "html": "
    \n
      \n
    • foo
    • \n
    \n
    \n
      \n
    • bar
    • \n
    \n", "example": 235, - "start_line": 3824, - "end_line": 3836, + "start_line": 3820, + "end_line": 3832, "section": "Block quotes" }, { "markdown": "> foo\n bar\n", "html": "
    \n
    foo\n
    \n
    \n
    bar\n
    \n", "example": 236, - "start_line": 3842, - "end_line": 3852, + "start_line": 3838, + "end_line": 3848, "section": "Block quotes" }, { "markdown": "> ```\nfoo\n```\n", "html": "
    \n
    \n
    \n

    foo

    \n
    \n", "example": 237, - "start_line": 3855, - "end_line": 3865, + "start_line": 3851, + "end_line": 3861, "section": "Block quotes" }, { "markdown": "> foo\n - bar\n", "html": "
    \n

    foo\n- bar

    \n
    \n", "example": 238, - "start_line": 3871, - "end_line": 3879, + "start_line": 3867, + "end_line": 3875, "section": "Block quotes" }, { "markdown": ">\n", "html": "
    \n
    \n", "example": 239, - "start_line": 3895, - "end_line": 3900, + "start_line": 3891, + "end_line": 3896, "section": "Block quotes" }, { "markdown": ">\n> \n> \n", "html": "
    \n
    \n", "example": 240, - "start_line": 3903, - "end_line": 3910, + "start_line": 3899, + "end_line": 3906, "section": "Block quotes" }, { "markdown": ">\n> foo\n> \n", "html": "
    \n

    foo

    \n
    \n", "example": 241, - "start_line": 3915, - "end_line": 3923, + "start_line": 3911, + "end_line": 3919, "section": "Block quotes" }, { "markdown": "> foo\n\n> bar\n", "html": "
    \n

    foo

    \n
    \n
    \n

    bar

    \n
    \n", "example": 242, - "start_line": 3928, - "end_line": 3939, + "start_line": 3924, + "end_line": 3935, "section": "Block quotes" }, { "markdown": "> foo\n> bar\n", "html": "
    \n

    foo\nbar

    \n
    \n", "example": 243, - "start_line": 3950, - "end_line": 3958, + "start_line": 3946, + "end_line": 3954, "section": "Block quotes" }, { "markdown": "> foo\n>\n> bar\n", "html": "
    \n

    foo

    \n

    bar

    \n
    \n", "example": 244, - "start_line": 3963, - "end_line": 3972, + "start_line": 3959, + "end_line": 3968, "section": "Block quotes" }, { "markdown": "foo\n> bar\n", "html": "

    foo

    \n
    \n

    bar

    \n
    \n", "example": 245, - "start_line": 3977, - "end_line": 3985, + "start_line": 3973, + "end_line": 3981, "section": "Block quotes" }, { "markdown": "> aaa\n***\n> bbb\n", "html": "
    \n

    aaa

    \n
    \n
    \n
    \n

    bbb

    \n
    \n", "example": 246, - "start_line": 3991, - "end_line": 4003, + "start_line": 3987, + "end_line": 3999, "section": "Block quotes" }, { "markdown": "> bar\nbaz\n", "html": "
    \n

    bar\nbaz

    \n
    \n", "example": 247, - "start_line": 4009, - "end_line": 4017, + "start_line": 4005, + "end_line": 4013, "section": "Block quotes" }, { "markdown": "> bar\n\nbaz\n", "html": "
    \n

    bar

    \n
    \n

    baz

    \n", "example": 248, - "start_line": 4020, - "end_line": 4029, + "start_line": 4016, + "end_line": 4025, "section": "Block quotes" }, { "markdown": "> bar\n>\nbaz\n", "html": "
    \n

    bar

    \n
    \n

    baz

    \n", "example": 249, - "start_line": 4032, - "end_line": 4041, + "start_line": 4028, + "end_line": 4037, "section": "Block quotes" }, { "markdown": "> > > foo\nbar\n", "html": "
    \n
    \n
    \n

    foo\nbar

    \n
    \n
    \n
    \n", "example": 250, - "start_line": 4048, - "end_line": 4060, + "start_line": 4044, + "end_line": 4056, "section": "Block quotes" }, { "markdown": ">>> foo\n> bar\n>>baz\n", "html": "
    \n
    \n
    \n

    foo\nbar\nbaz

    \n
    \n
    \n
    \n", "example": 251, - "start_line": 4063, - "end_line": 4077, + "start_line": 4059, + "end_line": 4073, "section": "Block quotes" }, { "markdown": "> code\n\n> not code\n", "html": "
    \n
    code\n
    \n
    \n
    \n

    not code

    \n
    \n", "example": 252, - "start_line": 4085, - "end_line": 4097, + "start_line": 4081, + "end_line": 4093, "section": "Block quotes" }, { "markdown": "A paragraph\nwith two lines.\n\n indented code\n\n> A block quote.\n", "html": "

    A paragraph\nwith two lines.

    \n
    indented code\n
    \n
    \n

    A block quote.

    \n
    \n", "example": 253, - "start_line": 4139, - "end_line": 4154, + "start_line": 4135, + "end_line": 4150, "section": "List items" }, { "markdown": "1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", "example": 254, - "start_line": 4161, - "end_line": 4180, + "start_line": 4157, + "end_line": 4176, "section": "List items" }, { "markdown": "- one\n\n two\n", "html": "
      \n
    • one
    • \n
    \n

    two

    \n", "example": 255, - "start_line": 4194, - "end_line": 4203, + "start_line": 4190, + "end_line": 4199, "section": "List items" }, { "markdown": "- one\n\n two\n", "html": "
      \n
    • \n

      one

      \n

      two

      \n
    • \n
    \n", "example": 256, - "start_line": 4206, - "end_line": 4217, + "start_line": 4202, + "end_line": 4213, "section": "List items" }, { "markdown": " - one\n\n two\n", "html": "
      \n
    • one
    • \n
    \n
     two\n
    \n", "example": 257, - "start_line": 4220, - "end_line": 4230, + "start_line": 4216, + "end_line": 4226, "section": "List items" }, { "markdown": " - one\n\n two\n", "html": "
      \n
    • \n

      one

      \n

      two

      \n
    • \n
    \n", "example": 258, - "start_line": 4233, - "end_line": 4244, + "start_line": 4229, + "end_line": 4240, "section": "List items" }, { "markdown": " > > 1. one\n>>\n>> two\n", "html": "
    \n
    \n
      \n
    1. \n

      one

      \n

      two

      \n
    2. \n
    \n
    \n
    \n", "example": 259, - "start_line": 4255, - "end_line": 4270, + "start_line": 4251, + "end_line": 4266, "section": "List items" }, { "markdown": ">>- one\n>>\n > > two\n", "html": "
    \n
    \n
      \n
    • one
    • \n
    \n

    two

    \n
    \n
    \n", "example": 260, - "start_line": 4282, - "end_line": 4295, + "start_line": 4278, + "end_line": 4291, "section": "List items" }, { "markdown": "-one\n\n2.two\n", "html": "

    -one

    \n

    2.two

    \n", "example": 261, - "start_line": 4301, - "end_line": 4308, + "start_line": 4297, + "end_line": 4304, "section": "List items" }, { "markdown": "- foo\n\n\n bar\n", "html": "
      \n
    • \n

      foo

      \n

      bar

      \n
    • \n
    \n", "example": 262, - "start_line": 4314, - "end_line": 4326, + "start_line": 4310, + "end_line": 4322, "section": "List items" }, { "markdown": "1. foo\n\n ```\n bar\n ```\n\n baz\n\n > bam\n", "html": "
      \n
    1. \n

      foo

      \n
      bar\n
      \n

      baz

      \n
      \n

      bam

      \n
      \n
    2. \n
    \n", "example": 263, - "start_line": 4331, - "end_line": 4353, + "start_line": 4327, + "end_line": 4349, "section": "List items" }, { "markdown": "- Foo\n\n bar\n\n\n baz\n", "html": "
      \n
    • \n

      Foo

      \n
      bar\n\n\nbaz\n
      \n
    • \n
    \n", "example": 264, - "start_line": 4359, - "end_line": 4377, + "start_line": 4355, + "end_line": 4373, "section": "List items" }, { "markdown": "123456789. ok\n", "html": "
      \n
    1. ok
    2. \n
    \n", "example": 265, - "start_line": 4381, - "end_line": 4387, + "start_line": 4377, + "end_line": 4383, "section": "List items" }, { "markdown": "1234567890. not ok\n", "html": "

    1234567890. not ok

    \n", "example": 266, - "start_line": 4390, - "end_line": 4394, + "start_line": 4386, + "end_line": 4390, "section": "List items" }, { "markdown": "0. ok\n", "html": "
      \n
    1. ok
    2. \n
    \n", "example": 267, - "start_line": 4399, - "end_line": 4405, + "start_line": 4395, + "end_line": 4401, "section": "List items" }, { "markdown": "003. ok\n", "html": "
      \n
    1. ok
    2. \n
    \n", "example": 268, - "start_line": 4408, - "end_line": 4414, + "start_line": 4404, + "end_line": 4410, "section": "List items" }, { "markdown": "-1. not ok\n", "html": "

    -1. not ok

    \n", "example": 269, - "start_line": 4419, - "end_line": 4423, + "start_line": 4415, + "end_line": 4419, "section": "List items" }, { "markdown": "- foo\n\n bar\n", "html": "
      \n
    • \n

      foo

      \n
      bar\n
      \n
    • \n
    \n", "example": 270, - "start_line": 4442, - "end_line": 4454, + "start_line": 4438, + "end_line": 4450, "section": "List items" }, { "markdown": " 10. foo\n\n bar\n", "html": "
      \n
    1. \n

      foo

      \n
      bar\n
      \n
    2. \n
    \n", "example": 271, - "start_line": 4459, - "end_line": 4471, + "start_line": 4455, + "end_line": 4467, "section": "List items" }, { "markdown": " indented code\n\nparagraph\n\n more code\n", "html": "
    indented code\n
    \n

    paragraph

    \n
    more code\n
    \n", "example": 272, - "start_line": 4478, - "end_line": 4490, + "start_line": 4474, + "end_line": 4486, "section": "List items" }, { "markdown": "1. indented code\n\n paragraph\n\n more code\n", "html": "
      \n
    1. \n
      indented code\n
      \n

      paragraph

      \n
      more code\n
      \n
    2. \n
    \n", "example": 273, - "start_line": 4493, - "end_line": 4509, + "start_line": 4489, + "end_line": 4505, "section": "List items" }, { "markdown": "1. indented code\n\n paragraph\n\n more code\n", "html": "
      \n
    1. \n
       indented code\n
      \n

      paragraph

      \n
      more code\n
      \n
    2. \n
    \n", "example": 274, - "start_line": 4515, - "end_line": 4531, + "start_line": 4511, + "end_line": 4527, "section": "List items" }, { "markdown": " foo\n\nbar\n", "html": "

    foo

    \n

    bar

    \n", "example": 275, - "start_line": 4542, - "end_line": 4549, + "start_line": 4538, + "end_line": 4545, "section": "List items" }, { "markdown": "- foo\n\n bar\n", "html": "
      \n
    • foo
    • \n
    \n

    bar

    \n", "example": 276, - "start_line": 4552, - "end_line": 4561, + "start_line": 4548, + "end_line": 4557, "section": "List items" }, { "markdown": "- foo\n\n bar\n", "html": "
      \n
    • \n

      foo

      \n

      bar

      \n
    • \n
    \n", "example": 277, - "start_line": 4569, - "end_line": 4580, + "start_line": 4565, + "end_line": 4576, "section": "List items" }, { "markdown": "-\n foo\n-\n ```\n bar\n ```\n-\n baz\n", "html": "
      \n
    • foo
    • \n
    • \n
      bar\n
      \n
    • \n
    • \n
      baz\n
      \n
    • \n
    \n", "example": 278, - "start_line": 4596, - "end_line": 4617, + "start_line": 4592, + "end_line": 4613, "section": "List items" }, { "markdown": "- \n foo\n", "html": "
      \n
    • foo
    • \n
    \n", "example": 279, - "start_line": 4622, - "end_line": 4629, + "start_line": 4618, + "end_line": 4625, "section": "List items" }, { "markdown": "-\n\n foo\n", "html": "
      \n
    • \n
    \n

    foo

    \n", "example": 280, - "start_line": 4636, - "end_line": 4645, + "start_line": 4632, + "end_line": 4641, "section": "List items" }, { "markdown": "- foo\n-\n- bar\n", "html": "
      \n
    • foo
    • \n
    • \n
    • bar
    • \n
    \n", "example": 281, - "start_line": 4650, - "end_line": 4660, + "start_line": 4646, + "end_line": 4656, "section": "List items" }, { "markdown": "- foo\n- \n- bar\n", "html": "
      \n
    • foo
    • \n
    • \n
    • bar
    • \n
    \n", "example": 282, - "start_line": 4665, - "end_line": 4675, + "start_line": 4661, + "end_line": 4671, "section": "List items" }, { "markdown": "1. foo\n2.\n3. bar\n", "html": "
      \n
    1. foo
    2. \n
    3. \n
    4. bar
    5. \n
    \n", "example": 283, - "start_line": 4680, - "end_line": 4690, + "start_line": 4676, + "end_line": 4686, "section": "List items" }, { "markdown": "*\n", "html": "
      \n
    • \n
    \n", "example": 284, - "start_line": 4695, - "end_line": 4701, + "start_line": 4691, + "end_line": 4697, "section": "List items" }, { "markdown": "foo\n*\n\nfoo\n1.\n", "html": "

    foo\n*

    \n

    foo\n1.

    \n", "example": 285, - "start_line": 4705, - "end_line": 4716, + "start_line": 4701, + "end_line": 4712, "section": "List items" }, { "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", "example": 286, - "start_line": 4727, - "end_line": 4746, + "start_line": 4723, + "end_line": 4742, "section": "List items" }, { "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", "example": 287, - "start_line": 4751, - "end_line": 4770, + "start_line": 4747, + "end_line": 4766, "section": "List items" }, { "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", "example": 288, - "start_line": 4775, - "end_line": 4794, + "start_line": 4771, + "end_line": 4790, "section": "List items" }, { "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", "html": "
    1.  A paragraph\n    with two lines.\n\n        indented code\n\n    > A block quote.\n
    \n", "example": 289, - "start_line": 4799, - "end_line": 4814, + "start_line": 4795, + "end_line": 4810, "section": "List items" }, { "markdown": " 1. A paragraph\nwith two lines.\n\n indented code\n\n > A block quote.\n", "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", "example": 290, - "start_line": 4829, - "end_line": 4848, + "start_line": 4825, + "end_line": 4844, "section": "List items" }, { "markdown": " 1. A paragraph\n with two lines.\n", "html": "
      \n
    1. A paragraph\nwith two lines.
    2. \n
    \n", "example": 291, - "start_line": 4853, - "end_line": 4861, + "start_line": 4849, + "end_line": 4857, "section": "List items" }, { "markdown": "> 1. > Blockquote\ncontinued here.\n", "html": "
    \n
      \n
    1. \n
      \n

      Blockquote\ncontinued here.

      \n
      \n
    2. \n
    \n
    \n", "example": 292, - "start_line": 4866, - "end_line": 4880, + "start_line": 4862, + "end_line": 4876, "section": "List items" }, { "markdown": "> 1. > Blockquote\n> continued here.\n", "html": "
    \n
      \n
    1. \n
      \n

      Blockquote\ncontinued here.

      \n
      \n
    2. \n
    \n
    \n", "example": 293, - "start_line": 4883, - "end_line": 4897, + "start_line": 4879, + "end_line": 4893, "section": "List items" }, { "markdown": "- foo\n - bar\n - baz\n - boo\n", "html": "
      \n
    • foo\n
        \n
      • bar\n
          \n
        • baz\n
            \n
          • boo
          • \n
          \n
        • \n
        \n
      • \n
      \n
    • \n
    \n", "example": 294, - "start_line": 4911, - "end_line": 4932, + "start_line": 4907, + "end_line": 4928, "section": "List items" }, { "markdown": "- foo\n - bar\n - baz\n - boo\n", "html": "
      \n
    • foo
    • \n
    • bar
    • \n
    • baz
    • \n
    • boo
    • \n
    \n", "example": 295, - "start_line": 4937, - "end_line": 4949, + "start_line": 4933, + "end_line": 4945, "section": "List items" }, { "markdown": "10) foo\n - bar\n", "html": "
      \n
    1. foo\n
        \n
      • bar
      • \n
      \n
    2. \n
    \n", "example": 296, - "start_line": 4954, - "end_line": 4965, + "start_line": 4950, + "end_line": 4961, "section": "List items" }, { "markdown": "10) foo\n - bar\n", "html": "
      \n
    1. foo
    2. \n
    \n
      \n
    • bar
    • \n
    \n", "example": 297, - "start_line": 4970, - "end_line": 4980, + "start_line": 4966, + "end_line": 4976, "section": "List items" }, { "markdown": "- - foo\n", "html": "
      \n
    • \n
        \n
      • foo
      • \n
      \n
    • \n
    \n", "example": 298, - "start_line": 4985, - "end_line": 4995, + "start_line": 4981, + "end_line": 4991, "section": "List items" }, { "markdown": "1. - 2. foo\n", "html": "
      \n
    1. \n
        \n
      • \n
          \n
        1. foo
        2. \n
        \n
      • \n
      \n
    2. \n
    \n", "example": 299, - "start_line": 4998, - "end_line": 5012, + "start_line": 4994, + "end_line": 5008, "section": "List items" }, { "markdown": "- # Foo\n- Bar\n ---\n baz\n", "html": "
      \n
    • \n

      Foo

      \n
    • \n
    • \n

      Bar

      \nbaz
    • \n
    \n", "example": 300, - "start_line": 5017, - "end_line": 5031, + "start_line": 5013, + "end_line": 5027, "section": "List items" }, { "markdown": "- foo\n- bar\n+ baz\n", "html": "
      \n
    • foo
    • \n
    • bar
    • \n
    \n
      \n
    • baz
    • \n
    \n", "example": 301, - "start_line": 5253, - "end_line": 5265, + "start_line": 5249, + "end_line": 5261, "section": "Lists" }, { "markdown": "1. foo\n2. bar\n3) baz\n", "html": "
      \n
    1. foo
    2. \n
    3. bar
    4. \n
    \n
      \n
    1. baz
    2. \n
    \n", "example": 302, - "start_line": 5268, - "end_line": 5280, + "start_line": 5264, + "end_line": 5276, "section": "Lists" }, { "markdown": "Foo\n- bar\n- baz\n", "html": "

    Foo

    \n
      \n
    • bar
    • \n
    • baz
    • \n
    \n", "example": 303, - "start_line": 5287, - "end_line": 5297, + "start_line": 5283, + "end_line": 5293, "section": "Lists" }, { "markdown": "The number of windows in my house is\n14. The number of doors is 6.\n", "html": "

    The number of windows in my house is\n14. The number of doors is 6.

    \n", "example": 304, - "start_line": 5364, - "end_line": 5370, + "start_line": 5360, + "end_line": 5366, "section": "Lists" }, { "markdown": "The number of windows in my house is\n1. The number of doors is 6.\n", "html": "

    The number of windows in my house is

    \n
      \n
    1. The number of doors is 6.
    2. \n
    \n", "example": 305, - "start_line": 5374, - "end_line": 5382, + "start_line": 5370, + "end_line": 5378, "section": "Lists" }, { "markdown": "- foo\n\n- bar\n\n\n- baz\n", "html": "
      \n
    • \n

      foo

      \n
    • \n
    • \n

      bar

      \n
    • \n
    • \n

      baz

      \n
    • \n
    \n", "example": 306, - "start_line": 5388, - "end_line": 5407, + "start_line": 5384, + "end_line": 5403, "section": "Lists" }, { "markdown": "- foo\n - bar\n - baz\n\n\n bim\n", "html": "
      \n
    • foo\n
        \n
      • bar\n
          \n
        • \n

          baz

          \n

          bim

          \n
        • \n
        \n
      • \n
      \n
    • \n
    \n", "example": 307, - "start_line": 5409, - "end_line": 5431, + "start_line": 5405, + "end_line": 5427, "section": "Lists" }, { "markdown": "- foo\n- bar\n\n\n\n- baz\n- bim\n", "html": "
      \n
    • foo
    • \n
    • bar
    • \n
    \n\n
      \n
    • baz
    • \n
    • bim
    • \n
    \n", "example": 308, - "start_line": 5439, - "end_line": 5457, + "start_line": 5435, + "end_line": 5453, "section": "Lists" }, { "markdown": "- foo\n\n notcode\n\n- foo\n\n\n\n code\n", "html": "
      \n
    • \n

      foo

      \n

      notcode

      \n
    • \n
    • \n

      foo

      \n
    • \n
    \n\n
    code\n
    \n", "example": 309, - "start_line": 5460, - "end_line": 5483, + "start_line": 5456, + "end_line": 5479, "section": "Lists" }, { "markdown": "- a\n - b\n - c\n - d\n - e\n - f\n- g\n", "html": "
      \n
    • a
    • \n
    • b
    • \n
    • c
    • \n
    • d
    • \n
    • e
    • \n
    • f
    • \n
    • g
    • \n
    \n", "example": 310, - "start_line": 5491, - "end_line": 5509, + "start_line": 5487, + "end_line": 5505, "section": "Lists" }, { "markdown": "1. a\n\n 2. b\n\n 3. c\n", "html": "
      \n
    1. \n

      a

      \n
    2. \n
    3. \n

      b

      \n
    4. \n
    5. \n

      c

      \n
    6. \n
    \n", "example": 311, - "start_line": 5512, - "end_line": 5530, + "start_line": 5508, + "end_line": 5526, "section": "Lists" }, { "markdown": "- a\n - b\n - c\n - d\n - e\n", "html": "
      \n
    • a
    • \n
    • b
    • \n
    • c
    • \n
    • d\n- e
    • \n
    \n", "example": 312, - "start_line": 5536, - "end_line": 5550, + "start_line": 5532, + "end_line": 5546, "section": "Lists" }, { "markdown": "1. a\n\n 2. b\n\n 3. c\n", "html": "
      \n
    1. \n

      a

      \n
    2. \n
    3. \n

      b

      \n
    4. \n
    \n
    3. c\n
    \n", "example": 313, - "start_line": 5556, - "end_line": 5573, + "start_line": 5552, + "end_line": 5569, "section": "Lists" }, { "markdown": "- a\n- b\n\n- c\n", "html": "
      \n
    • \n

      a

      \n
    • \n
    • \n

      b

      \n
    • \n
    • \n

      c

      \n
    • \n
    \n", "example": 314, - "start_line": 5579, - "end_line": 5596, + "start_line": 5575, + "end_line": 5592, "section": "Lists" }, { "markdown": "* a\n*\n\n* c\n", "html": "
      \n
    • \n

      a

      \n
    • \n
    • \n
    • \n

      c

      \n
    • \n
    \n", "example": 315, - "start_line": 5601, - "end_line": 5616, + "start_line": 5597, + "end_line": 5612, "section": "Lists" }, { "markdown": "- a\n- b\n\n c\n- d\n", "html": "
      \n
    • \n

      a

      \n
    • \n
    • \n

      b

      \n

      c

      \n
    • \n
    • \n

      d

      \n
    • \n
    \n", "example": 316, - "start_line": 5623, - "end_line": 5642, + "start_line": 5619, + "end_line": 5638, "section": "Lists" }, { "markdown": "- a\n- b\n\n [ref]: /url\n- d\n", "html": "
      \n
    • \n

      a

      \n
    • \n
    • \n

      b

      \n
    • \n
    • \n

      d

      \n
    • \n
    \n", "example": 317, - "start_line": 5645, - "end_line": 5663, + "start_line": 5641, + "end_line": 5659, "section": "Lists" }, { "markdown": "- a\n- ```\n b\n\n\n ```\n- c\n", "html": "
      \n
    • a
    • \n
    • \n
      b\n\n\n
      \n
    • \n
    • c
    • \n
    \n", "example": 318, - "start_line": 5668, - "end_line": 5687, + "start_line": 5664, + "end_line": 5683, "section": "Lists" }, { "markdown": "- a\n - b\n\n c\n- d\n", "html": "
      \n
    • a\n
        \n
      • \n

        b

        \n

        c

        \n
      • \n
      \n
    • \n
    • d
    • \n
    \n", "example": 319, - "start_line": 5694, - "end_line": 5712, + "start_line": 5690, + "end_line": 5708, "section": "Lists" }, { "markdown": "* a\n > b\n >\n* c\n", "html": "
      \n
    • a\n
      \n

      b

      \n
      \n
    • \n
    • c
    • \n
    \n", "example": 320, - "start_line": 5718, - "end_line": 5732, + "start_line": 5714, + "end_line": 5728, "section": "Lists" }, { "markdown": "- a\n > b\n ```\n c\n ```\n- d\n", "html": "
      \n
    • a\n
      \n

      b

      \n
      \n
      c\n
      \n
    • \n
    • d
    • \n
    \n", "example": 321, - "start_line": 5738, - "end_line": 5756, + "start_line": 5734, + "end_line": 5752, "section": "Lists" }, { "markdown": "- a\n", "html": "
      \n
    • a
    • \n
    \n", "example": 322, - "start_line": 5761, - "end_line": 5767, + "start_line": 5757, + "end_line": 5763, "section": "Lists" }, { "markdown": "- a\n - b\n", "html": "
      \n
    • a\n
        \n
      • b
      • \n
      \n
    • \n
    \n", "example": 323, - "start_line": 5770, - "end_line": 5781, + "start_line": 5766, + "end_line": 5777, "section": "Lists" }, { "markdown": "1. ```\n foo\n ```\n\n bar\n", "html": "
      \n
    1. \n
      foo\n
      \n

      bar

      \n
    2. \n
    \n", "example": 324, - "start_line": 5787, - "end_line": 5801, + "start_line": 5783, + "end_line": 5797, "section": "Lists" }, { "markdown": "* foo\n * bar\n\n baz\n", "html": "
      \n
    • \n

      foo

      \n
        \n
      • bar
      • \n
      \n

      baz

      \n
    • \n
    \n", "example": 325, - "start_line": 5806, - "end_line": 5821, + "start_line": 5802, + "end_line": 5817, "section": "Lists" }, { "markdown": "- a\n - b\n - c\n\n- d\n - e\n - f\n", "html": "
      \n
    • \n

      a

      \n
        \n
      • b
      • \n
      • c
      • \n
      \n
    • \n
    • \n

      d

      \n
        \n
      • e
      • \n
      • f
      • \n
      \n
    • \n
    \n", "example": 326, - "start_line": 5824, - "end_line": 5849, + "start_line": 5820, + "end_line": 5845, "section": "Lists" }, { "markdown": "`hi`lo`\n", "html": "

    hilo`

    \n", "example": 327, - "start_line": 5858, - "end_line": 5862, + "start_line": 5854, + "end_line": 5858, "section": "Inlines" }, { "markdown": "`foo`\n", "html": "

    foo

    \n", "example": 328, - "start_line": 5890, - "end_line": 5894, + "start_line": 5886, + "end_line": 5890, "section": "Code spans" }, { "markdown": "`` foo ` bar ``\n", "html": "

    foo ` bar

    \n", "example": 329, - "start_line": 5901, - "end_line": 5905, + "start_line": 5897, + "end_line": 5901, "section": "Code spans" }, { "markdown": "` `` `\n", "html": "

    ``

    \n", "example": 330, - "start_line": 5911, - "end_line": 5915, + "start_line": 5907, + "end_line": 5911, "section": "Code spans" }, { "markdown": "` `` `\n", "html": "

    ``

    \n", "example": 331, - "start_line": 5919, - "end_line": 5923, + "start_line": 5915, + "end_line": 5919, "section": "Code spans" }, { "markdown": "` a`\n", "html": "

    a

    \n", "example": 332, - "start_line": 5928, - "end_line": 5932, + "start_line": 5924, + "end_line": 5928, "section": "Code spans" }, { "markdown": "` b `\n", "html": "

     b 

    \n", "example": 333, - "start_line": 5937, - "end_line": 5941, + "start_line": 5933, + "end_line": 5937, "section": "Code spans" }, { "markdown": "` `\n` `\n", "html": "

     \n

    \n", "example": 334, - "start_line": 5945, - "end_line": 5951, + "start_line": 5941, + "end_line": 5947, "section": "Code spans" }, { "markdown": "``\nfoo\nbar \nbaz\n``\n", "html": "

    foo bar baz

    \n", "example": 335, - "start_line": 5956, - "end_line": 5964, + "start_line": 5952, + "end_line": 5960, "section": "Code spans" }, { "markdown": "``\nfoo \n``\n", "html": "

    foo

    \n", "example": 336, - "start_line": 5966, - "end_line": 5972, + "start_line": 5962, + "end_line": 5968, "section": "Code spans" }, { "markdown": "`foo bar \nbaz`\n", "html": "

    foo bar baz

    \n", "example": 337, - "start_line": 5977, - "end_line": 5982, + "start_line": 5973, + "end_line": 5978, "section": "Code spans" }, { "markdown": "`foo\\`bar`\n", "html": "

    foo\\bar`

    \n", "example": 338, - "start_line": 5994, - "end_line": 5998, + "start_line": 5990, + "end_line": 5994, "section": "Code spans" }, { "markdown": "``foo`bar``\n", "html": "

    foo`bar

    \n", "example": 339, - "start_line": 6005, - "end_line": 6009, + "start_line": 6001, + "end_line": 6005, "section": "Code spans" }, { "markdown": "` foo `` bar `\n", "html": "

    foo `` bar

    \n", "example": 340, - "start_line": 6011, - "end_line": 6015, + "start_line": 6007, + "end_line": 6011, "section": "Code spans" }, { "markdown": "*foo`*`\n", "html": "

    *foo*

    \n", "example": 341, - "start_line": 6023, - "end_line": 6027, + "start_line": 6019, + "end_line": 6023, "section": "Code spans" }, { "markdown": "[not a `link](/foo`)\n", "html": "

    [not a link](/foo)

    \n", "example": 342, - "start_line": 6032, - "end_line": 6036, + "start_line": 6028, + "end_line": 6032, "section": "Code spans" }, { "markdown": "``\n", "html": "

    <a href="">`

    \n", "example": 343, - "start_line": 6042, - "end_line": 6046, + "start_line": 6038, + "end_line": 6042, "section": "Code spans" }, { "markdown": "
    `\n", "html": "

    `

    \n", "example": 344, - "start_line": 6051, - "end_line": 6055, + "start_line": 6047, + "end_line": 6051, "section": "Code spans" }, { - "markdown": "``\n", - "html": "

    <http://foo.bar.baz>`

    \n", + "markdown": "``\n", + "html": "

    <https://foo.bar.baz>`

    \n", "example": 345, - "start_line": 6060, - "end_line": 6064, + "start_line": 6056, + "end_line": 6060, "section": "Code spans" }, { - "markdown": "`\n", - "html": "

    http://foo.bar.`baz`

    \n", + "markdown": "`\n", + "html": "

    https://foo.bar.`baz`

    \n", "example": 346, - "start_line": 6069, - "end_line": 6073, + "start_line": 6065, + "end_line": 6069, "section": "Code spans" }, { "markdown": "```foo``\n", "html": "

    ```foo``

    \n", "example": 347, - "start_line": 6079, - "end_line": 6083, + "start_line": 6075, + "end_line": 6079, "section": "Code spans" }, { "markdown": "`foo\n", "html": "

    `foo

    \n", "example": 348, - "start_line": 6086, - "end_line": 6090, + "start_line": 6082, + "end_line": 6086, "section": "Code spans" }, { "markdown": "`foo``bar``\n", "html": "

    `foobar

    \n", "example": 349, - "start_line": 6095, - "end_line": 6099, + "start_line": 6091, + "end_line": 6095, "section": "Code spans" }, { "markdown": "*foo bar*\n", "html": "

    foo bar

    \n", "example": 350, - "start_line": 6312, - "end_line": 6316, + "start_line": 6308, + "end_line": 6312, "section": "Emphasis and strong emphasis" }, { "markdown": "a * foo bar*\n", "html": "

    a * foo bar*

    \n", "example": 351, - "start_line": 6322, - "end_line": 6326, + "start_line": 6318, + "end_line": 6322, "section": "Emphasis and strong emphasis" }, { "markdown": "a*\"foo\"*\n", "html": "

    a*"foo"*

    \n", "example": 352, - "start_line": 6333, - "end_line": 6337, + "start_line": 6329, + "end_line": 6333, "section": "Emphasis and strong emphasis" }, { "markdown": "* a *\n", "html": "

    * a *

    \n", "example": 353, - "start_line": 6342, - "end_line": 6346, + "start_line": 6338, + "end_line": 6342, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*$*alpha.\n\n*£*bravo.\n\n*€*charlie.\n", + "html": "

    *$*alpha.

    \n

    *£*bravo.

    \n

    *€*charlie.

    \n", + "example": 354, + "start_line": 6347, + "end_line": 6357, "section": "Emphasis and strong emphasis" }, { "markdown": "foo*bar*\n", "html": "

    foobar

    \n", - "example": 354, - "start_line": 6351, - "end_line": 6355, + "example": 355, + "start_line": 6362, + "end_line": 6366, "section": "Emphasis and strong emphasis" }, { "markdown": "5*6*78\n", "html": "

    5678

    \n", - "example": 355, - "start_line": 6358, - "end_line": 6362, + "example": 356, + "start_line": 6369, + "end_line": 6373, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo bar_\n", "html": "

    foo bar

    \n", - "example": 356, - "start_line": 6367, - "end_line": 6371, + "example": 357, + "start_line": 6378, + "end_line": 6382, "section": "Emphasis and strong emphasis" }, { "markdown": "_ foo bar_\n", "html": "

    _ foo bar_

    \n", - "example": 357, - "start_line": 6377, - "end_line": 6381, + "example": 358, + "start_line": 6388, + "end_line": 6392, "section": "Emphasis and strong emphasis" }, { "markdown": "a_\"foo\"_\n", "html": "

    a_"foo"_

    \n", - "example": 358, - "start_line": 6387, - "end_line": 6391, + "example": 359, + "start_line": 6398, + "end_line": 6402, "section": "Emphasis and strong emphasis" }, { "markdown": "foo_bar_\n", "html": "

    foo_bar_

    \n", - "example": 359, - "start_line": 6396, - "end_line": 6400, + "example": 360, + "start_line": 6407, + "end_line": 6411, "section": "Emphasis and strong emphasis" }, { "markdown": "5_6_78\n", "html": "

    5_6_78

    \n", - "example": 360, - "start_line": 6403, - "end_line": 6407, + "example": 361, + "start_line": 6414, + "end_line": 6418, "section": "Emphasis and strong emphasis" }, { "markdown": "пристаням_стремятся_\n", "html": "

    пристаням_стремятся_

    \n", - "example": 361, - "start_line": 6410, - "end_line": 6414, + "example": 362, + "start_line": 6421, + "end_line": 6425, "section": "Emphasis and strong emphasis" }, { "markdown": "aa_\"bb\"_cc\n", "html": "

    aa_"bb"_cc

    \n", - "example": 362, - "start_line": 6420, - "end_line": 6424, + "example": 363, + "start_line": 6431, + "end_line": 6435, "section": "Emphasis and strong emphasis" }, { "markdown": "foo-_(bar)_\n", "html": "

    foo-(bar)

    \n", - "example": 363, - "start_line": 6431, - "end_line": 6435, + "example": 364, + "start_line": 6442, + "end_line": 6446, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo*\n", "html": "

    _foo*

    \n", - "example": 364, - "start_line": 6443, - "end_line": 6447, + "example": 365, + "start_line": 6454, + "end_line": 6458, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo bar *\n", "html": "

    *foo bar *

    \n", - "example": 365, - "start_line": 6453, - "end_line": 6457, + "example": 366, + "start_line": 6464, + "end_line": 6468, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo bar\n*\n", "html": "

    *foo bar\n*

    \n", - "example": 366, - "start_line": 6462, - "end_line": 6468, + "example": 367, + "start_line": 6473, + "end_line": 6479, "section": "Emphasis and strong emphasis" }, { "markdown": "*(*foo)\n", "html": "

    *(*foo)

    \n", - "example": 367, - "start_line": 6475, - "end_line": 6479, + "example": 368, + "start_line": 6486, + "end_line": 6490, "section": "Emphasis and strong emphasis" }, { "markdown": "*(*foo*)*\n", "html": "

    (foo)

    \n", - "example": 368, - "start_line": 6485, - "end_line": 6489, + "example": 369, + "start_line": 6496, + "end_line": 6500, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo*bar\n", "html": "

    foobar

    \n", - "example": 369, - "start_line": 6494, - "end_line": 6498, + "example": 370, + "start_line": 6505, + "end_line": 6509, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo bar _\n", "html": "

    _foo bar _

    \n", - "example": 370, - "start_line": 6507, - "end_line": 6511, + "example": 371, + "start_line": 6518, + "end_line": 6522, "section": "Emphasis and strong emphasis" }, { "markdown": "_(_foo)\n", "html": "

    _(_foo)

    \n", - "example": 371, - "start_line": 6517, - "end_line": 6521, + "example": 372, + "start_line": 6528, + "end_line": 6532, "section": "Emphasis and strong emphasis" }, { "markdown": "_(_foo_)_\n", "html": "

    (foo)

    \n", - "example": 372, - "start_line": 6526, - "end_line": 6530, + "example": 373, + "start_line": 6537, + "end_line": 6541, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo_bar\n", "html": "

    _foo_bar

    \n", - "example": 373, - "start_line": 6535, - "end_line": 6539, + "example": 374, + "start_line": 6546, + "end_line": 6550, "section": "Emphasis and strong emphasis" }, { "markdown": "_пристаням_стремятся\n", "html": "

    _пристаням_стремятся

    \n", - "example": 374, - "start_line": 6542, - "end_line": 6546, + "example": 375, + "start_line": 6553, + "end_line": 6557, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo_bar_baz_\n", "html": "

    foo_bar_baz

    \n", - "example": 375, - "start_line": 6549, - "end_line": 6553, + "example": 376, + "start_line": 6560, + "end_line": 6564, "section": "Emphasis and strong emphasis" }, { "markdown": "_(bar)_.\n", "html": "

    (bar).

    \n", - "example": 376, - "start_line": 6560, - "end_line": 6564, + "example": 377, + "start_line": 6571, + "end_line": 6575, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo bar**\n", "html": "

    foo bar

    \n", - "example": 377, - "start_line": 6569, - "end_line": 6573, + "example": 378, + "start_line": 6580, + "end_line": 6584, "section": "Emphasis and strong emphasis" }, { "markdown": "** foo bar**\n", "html": "

    ** foo bar**

    \n", - "example": 378, - "start_line": 6579, - "end_line": 6583, + "example": 379, + "start_line": 6590, + "end_line": 6594, "section": "Emphasis and strong emphasis" }, { "markdown": "a**\"foo\"**\n", "html": "

    a**"foo"**

    \n", - "example": 379, - "start_line": 6590, - "end_line": 6594, + "example": 380, + "start_line": 6601, + "end_line": 6605, "section": "Emphasis and strong emphasis" }, { "markdown": "foo**bar**\n", "html": "

    foobar

    \n", - "example": 380, - "start_line": 6599, - "end_line": 6603, + "example": 381, + "start_line": 6610, + "end_line": 6614, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo bar__\n", "html": "

    foo bar

    \n", - "example": 381, - "start_line": 6608, - "end_line": 6612, + "example": 382, + "start_line": 6619, + "end_line": 6623, "section": "Emphasis and strong emphasis" }, { "markdown": "__ foo bar__\n", "html": "

    __ foo bar__

    \n", - "example": 382, - "start_line": 6618, - "end_line": 6622, + "example": 383, + "start_line": 6629, + "end_line": 6633, "section": "Emphasis and strong emphasis" }, { "markdown": "__\nfoo bar__\n", "html": "

    __\nfoo bar__

    \n", - "example": 383, - "start_line": 6626, - "end_line": 6632, + "example": 384, + "start_line": 6637, + "end_line": 6643, "section": "Emphasis and strong emphasis" }, { "markdown": "a__\"foo\"__\n", "html": "

    a__"foo"__

    \n", - "example": 384, - "start_line": 6638, - "end_line": 6642, + "example": 385, + "start_line": 6649, + "end_line": 6653, "section": "Emphasis and strong emphasis" }, { "markdown": "foo__bar__\n", "html": "

    foo__bar__

    \n", - "example": 385, - "start_line": 6647, - "end_line": 6651, + "example": 386, + "start_line": 6658, + "end_line": 6662, "section": "Emphasis and strong emphasis" }, { "markdown": "5__6__78\n", "html": "

    5__6__78

    \n", - "example": 386, - "start_line": 6654, - "end_line": 6658, + "example": 387, + "start_line": 6665, + "end_line": 6669, "section": "Emphasis and strong emphasis" }, { "markdown": "пристаням__стремятся__\n", "html": "

    пристаням__стремятся__

    \n", - "example": 387, - "start_line": 6661, - "end_line": 6665, + "example": 388, + "start_line": 6672, + "end_line": 6676, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo, __bar__, baz__\n", "html": "

    foo, bar, baz

    \n", - "example": 388, - "start_line": 6668, - "end_line": 6672, + "example": 389, + "start_line": 6679, + "end_line": 6683, "section": "Emphasis and strong emphasis" }, { "markdown": "foo-__(bar)__\n", "html": "

    foo-(bar)

    \n", - "example": 389, - "start_line": 6679, - "end_line": 6683, + "example": 390, + "start_line": 6690, + "end_line": 6694, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo bar **\n", "html": "

    **foo bar **

    \n", - "example": 390, - "start_line": 6692, - "end_line": 6696, + "example": 391, + "start_line": 6703, + "end_line": 6707, "section": "Emphasis and strong emphasis" }, { "markdown": "**(**foo)\n", "html": "

    **(**foo)

    \n", - "example": 391, - "start_line": 6705, - "end_line": 6709, + "example": 392, + "start_line": 6716, + "end_line": 6720, "section": "Emphasis and strong emphasis" }, { "markdown": "*(**foo**)*\n", "html": "

    (foo)

    \n", - "example": 392, - "start_line": 6715, - "end_line": 6719, + "example": 393, + "start_line": 6726, + "end_line": 6730, "section": "Emphasis and strong emphasis" }, { "markdown": "**Gomphocarpus (*Gomphocarpus physocarpus*, syn.\n*Asclepias physocarpa*)**\n", "html": "

    Gomphocarpus (Gomphocarpus physocarpus, syn.\nAsclepias physocarpa)

    \n", - "example": 393, - "start_line": 6722, - "end_line": 6728, + "example": 394, + "start_line": 6733, + "end_line": 6739, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo \"*bar*\" foo**\n", "html": "

    foo "bar" foo

    \n", - "example": 394, - "start_line": 6731, - "end_line": 6735, + "example": 395, + "start_line": 6742, + "end_line": 6746, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo**bar\n", "html": "

    foobar

    \n", - "example": 395, - "start_line": 6740, - "end_line": 6744, + "example": 396, + "start_line": 6751, + "end_line": 6755, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo bar __\n", "html": "

    __foo bar __

    \n", - "example": 396, - "start_line": 6752, - "end_line": 6756, + "example": 397, + "start_line": 6763, + "end_line": 6767, "section": "Emphasis and strong emphasis" }, { "markdown": "__(__foo)\n", "html": "

    __(__foo)

    \n", - "example": 397, - "start_line": 6762, - "end_line": 6766, + "example": 398, + "start_line": 6773, + "end_line": 6777, "section": "Emphasis and strong emphasis" }, { "markdown": "_(__foo__)_\n", "html": "

    (foo)

    \n", - "example": 398, - "start_line": 6772, - "end_line": 6776, + "example": 399, + "start_line": 6783, + "end_line": 6787, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo__bar\n", "html": "

    __foo__bar

    \n", - "example": 399, - "start_line": 6781, - "end_line": 6785, + "example": 400, + "start_line": 6792, + "end_line": 6796, "section": "Emphasis and strong emphasis" }, { "markdown": "__пристаням__стремятся\n", "html": "

    __пристаням__стремятся

    \n", - "example": 400, - "start_line": 6788, - "end_line": 6792, + "example": 401, + "start_line": 6799, + "end_line": 6803, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo__bar__baz__\n", "html": "

    foo__bar__baz

    \n", - "example": 401, - "start_line": 6795, - "end_line": 6799, + "example": 402, + "start_line": 6806, + "end_line": 6810, "section": "Emphasis and strong emphasis" }, { "markdown": "__(bar)__.\n", "html": "

    (bar).

    \n", - "example": 402, - "start_line": 6806, - "end_line": 6810, + "example": 403, + "start_line": 6817, + "end_line": 6821, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo [bar](/url)*\n", "html": "

    foo bar

    \n", - "example": 403, - "start_line": 6818, - "end_line": 6822, + "example": 404, + "start_line": 6829, + "end_line": 6833, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo\nbar*\n", "html": "

    foo\nbar

    \n", - "example": 404, - "start_line": 6825, - "end_line": 6831, + "example": 405, + "start_line": 6836, + "end_line": 6842, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo __bar__ baz_\n", "html": "

    foo bar baz

    \n", - "example": 405, - "start_line": 6837, - "end_line": 6841, + "example": 406, + "start_line": 6848, + "end_line": 6852, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo _bar_ baz_\n", "html": "

    foo bar baz

    \n", - "example": 406, - "start_line": 6844, - "end_line": 6848, + "example": 407, + "start_line": 6855, + "end_line": 6859, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo_ bar_\n", "html": "

    foo bar

    \n", - "example": 407, - "start_line": 6851, - "end_line": 6855, + "example": 408, + "start_line": 6862, + "end_line": 6866, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo *bar**\n", "html": "

    foo bar

    \n", - "example": 408, - "start_line": 6858, - "end_line": 6862, + "example": 409, + "start_line": 6869, + "end_line": 6873, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo **bar** baz*\n", "html": "

    foo bar baz

    \n", - "example": 409, - "start_line": 6865, - "end_line": 6869, + "example": 410, + "start_line": 6876, + "end_line": 6880, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo**bar**baz*\n", "html": "

    foobarbaz

    \n", - "example": 410, - "start_line": 6871, - "end_line": 6875, + "example": 411, + "start_line": 6882, + "end_line": 6886, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo**bar*\n", "html": "

    foo**bar

    \n", - "example": 411, - "start_line": 6895, - "end_line": 6899, + "example": 412, + "start_line": 6906, + "end_line": 6910, "section": "Emphasis and strong emphasis" }, { "markdown": "***foo** bar*\n", "html": "

    foo bar

    \n", - "example": 412, - "start_line": 6908, - "end_line": 6912, + "example": 413, + "start_line": 6919, + "end_line": 6923, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo **bar***\n", "html": "

    foo bar

    \n", - "example": 413, - "start_line": 6915, - "end_line": 6919, + "example": 414, + "start_line": 6926, + "end_line": 6930, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo**bar***\n", "html": "

    foobar

    \n", - "example": 414, - "start_line": 6922, - "end_line": 6926, + "example": 415, + "start_line": 6933, + "end_line": 6937, "section": "Emphasis and strong emphasis" }, { "markdown": "foo***bar***baz\n", "html": "

    foobarbaz

    \n", - "example": 415, - "start_line": 6933, - "end_line": 6937, + "example": 416, + "start_line": 6944, + "end_line": 6948, "section": "Emphasis and strong emphasis" }, { "markdown": "foo******bar*********baz\n", "html": "

    foobar***baz

    \n", - "example": 416, - "start_line": 6939, - "end_line": 6943, + "example": 417, + "start_line": 6950, + "end_line": 6954, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo **bar *baz* bim** bop*\n", "html": "

    foo bar baz bim bop

    \n", - "example": 417, - "start_line": 6948, - "end_line": 6952, + "example": 418, + "start_line": 6959, + "end_line": 6963, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo [*bar*](/url)*\n", "html": "

    foo bar

    \n", - "example": 418, - "start_line": 6955, - "end_line": 6959, + "example": 419, + "start_line": 6966, + "end_line": 6970, "section": "Emphasis and strong emphasis" }, { "markdown": "** is not an empty emphasis\n", "html": "

    ** is not an empty emphasis

    \n", - "example": 419, - "start_line": 6964, - "end_line": 6968, + "example": 420, + "start_line": 6975, + "end_line": 6979, "section": "Emphasis and strong emphasis" }, { "markdown": "**** is not an empty strong emphasis\n", "html": "

    **** is not an empty strong emphasis

    \n", - "example": 420, - "start_line": 6971, - "end_line": 6975, + "example": 421, + "start_line": 6982, + "end_line": 6986, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo [bar](/url)**\n", "html": "

    foo bar

    \n", - "example": 421, - "start_line": 6984, - "end_line": 6988, + "example": 422, + "start_line": 6995, + "end_line": 6999, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo\nbar**\n", "html": "

    foo\nbar

    \n", - "example": 422, - "start_line": 6991, - "end_line": 6997, + "example": 423, + "start_line": 7002, + "end_line": 7008, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo _bar_ baz__\n", "html": "

    foo bar baz

    \n", - "example": 423, - "start_line": 7003, - "end_line": 7007, + "example": 424, + "start_line": 7014, + "end_line": 7018, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo __bar__ baz__\n", "html": "

    foo bar baz

    \n", - "example": 424, - "start_line": 7010, - "end_line": 7014, + "example": 425, + "start_line": 7021, + "end_line": 7025, "section": "Emphasis and strong emphasis" }, { "markdown": "____foo__ bar__\n", "html": "

    foo bar

    \n", - "example": 425, - "start_line": 7017, - "end_line": 7021, + "example": 426, + "start_line": 7028, + "end_line": 7032, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo **bar****\n", "html": "

    foo bar

    \n", - "example": 426, - "start_line": 7024, - "end_line": 7028, + "example": 427, + "start_line": 7035, + "end_line": 7039, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo *bar* baz**\n", "html": "

    foo bar baz

    \n", - "example": 427, - "start_line": 7031, - "end_line": 7035, + "example": 428, + "start_line": 7042, + "end_line": 7046, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo*bar*baz**\n", "html": "

    foobarbaz

    \n", - "example": 428, - "start_line": 7038, - "end_line": 7042, + "example": 429, + "start_line": 7049, + "end_line": 7053, "section": "Emphasis and strong emphasis" }, { "markdown": "***foo* bar**\n", "html": "

    foo bar

    \n", - "example": 429, - "start_line": 7045, - "end_line": 7049, + "example": 430, + "start_line": 7056, + "end_line": 7060, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo *bar***\n", "html": "

    foo bar

    \n", - "example": 430, - "start_line": 7052, - "end_line": 7056, + "example": 431, + "start_line": 7063, + "end_line": 7067, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo *bar **baz**\nbim* bop**\n", "html": "

    foo bar baz\nbim bop

    \n", - "example": 431, - "start_line": 7061, - "end_line": 7067, + "example": 432, + "start_line": 7072, + "end_line": 7078, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo [*bar*](/url)**\n", "html": "

    foo bar

    \n", - "example": 432, - "start_line": 7070, - "end_line": 7074, + "example": 433, + "start_line": 7081, + "end_line": 7085, "section": "Emphasis and strong emphasis" }, { "markdown": "__ is not an empty emphasis\n", "html": "

    __ is not an empty emphasis

    \n", - "example": 433, - "start_line": 7079, - "end_line": 7083, + "example": 434, + "start_line": 7090, + "end_line": 7094, "section": "Emphasis and strong emphasis" }, { "markdown": "____ is not an empty strong emphasis\n", "html": "

    ____ is not an empty strong emphasis

    \n", - "example": 434, - "start_line": 7086, - "end_line": 7090, + "example": 435, + "start_line": 7097, + "end_line": 7101, "section": "Emphasis and strong emphasis" }, { "markdown": "foo ***\n", "html": "

    foo ***

    \n", - "example": 435, - "start_line": 7096, - "end_line": 7100, + "example": 436, + "start_line": 7107, + "end_line": 7111, "section": "Emphasis and strong emphasis" }, { "markdown": "foo *\\**\n", "html": "

    foo *

    \n", - "example": 436, - "start_line": 7103, - "end_line": 7107, + "example": 437, + "start_line": 7114, + "end_line": 7118, "section": "Emphasis and strong emphasis" }, { "markdown": "foo *_*\n", "html": "

    foo _

    \n", - "example": 437, - "start_line": 7110, - "end_line": 7114, + "example": 438, + "start_line": 7121, + "end_line": 7125, "section": "Emphasis and strong emphasis" }, { "markdown": "foo *****\n", "html": "

    foo *****

    \n", - "example": 438, - "start_line": 7117, - "end_line": 7121, + "example": 439, + "start_line": 7128, + "end_line": 7132, "section": "Emphasis and strong emphasis" }, { "markdown": "foo **\\***\n", "html": "

    foo *

    \n", - "example": 439, - "start_line": 7124, - "end_line": 7128, + "example": 440, + "start_line": 7135, + "end_line": 7139, "section": "Emphasis and strong emphasis" }, { "markdown": "foo **_**\n", "html": "

    foo _

    \n", - "example": 440, - "start_line": 7131, - "end_line": 7135, + "example": 441, + "start_line": 7142, + "end_line": 7146, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo*\n", "html": "

    *foo

    \n", - "example": 441, - "start_line": 7142, - "end_line": 7146, + "example": 442, + "start_line": 7153, + "end_line": 7157, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo**\n", "html": "

    foo*

    \n", - "example": 442, - "start_line": 7149, - "end_line": 7153, + "example": 443, + "start_line": 7160, + "end_line": 7164, "section": "Emphasis and strong emphasis" }, { "markdown": "***foo**\n", "html": "

    *foo

    \n", - "example": 443, - "start_line": 7156, - "end_line": 7160, + "example": 444, + "start_line": 7167, + "end_line": 7171, "section": "Emphasis and strong emphasis" }, { "markdown": "****foo*\n", "html": "

    ***foo

    \n", - "example": 444, - "start_line": 7163, - "end_line": 7167, + "example": 445, + "start_line": 7174, + "end_line": 7178, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo***\n", "html": "

    foo*

    \n", - "example": 445, - "start_line": 7170, - "end_line": 7174, + "example": 446, + "start_line": 7181, + "end_line": 7185, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo****\n", "html": "

    foo***

    \n", - "example": 446, - "start_line": 7177, - "end_line": 7181, + "example": 447, + "start_line": 7188, + "end_line": 7192, "section": "Emphasis and strong emphasis" }, { "markdown": "foo ___\n", "html": "

    foo ___

    \n", - "example": 447, - "start_line": 7187, - "end_line": 7191, + "example": 448, + "start_line": 7198, + "end_line": 7202, "section": "Emphasis and strong emphasis" }, { "markdown": "foo _\\__\n", "html": "

    foo _

    \n", - "example": 448, - "start_line": 7194, - "end_line": 7198, + "example": 449, + "start_line": 7205, + "end_line": 7209, "section": "Emphasis and strong emphasis" }, { "markdown": "foo _*_\n", "html": "

    foo *

    \n", - "example": 449, - "start_line": 7201, - "end_line": 7205, + "example": 450, + "start_line": 7212, + "end_line": 7216, "section": "Emphasis and strong emphasis" }, { "markdown": "foo _____\n", "html": "

    foo _____

    \n", - "example": 450, - "start_line": 7208, - "end_line": 7212, + "example": 451, + "start_line": 7219, + "end_line": 7223, "section": "Emphasis and strong emphasis" }, { "markdown": "foo __\\___\n", "html": "

    foo _

    \n", - "example": 451, - "start_line": 7215, - "end_line": 7219, + "example": 452, + "start_line": 7226, + "end_line": 7230, "section": "Emphasis and strong emphasis" }, { "markdown": "foo __*__\n", "html": "

    foo *

    \n", - "example": 452, - "start_line": 7222, - "end_line": 7226, + "example": 453, + "start_line": 7233, + "end_line": 7237, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo_\n", "html": "

    _foo

    \n", - "example": 453, - "start_line": 7229, - "end_line": 7233, + "example": 454, + "start_line": 7240, + "end_line": 7244, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo__\n", "html": "

    foo_

    \n", - "example": 454, - "start_line": 7240, - "end_line": 7244, + "example": 455, + "start_line": 7251, + "end_line": 7255, "section": "Emphasis and strong emphasis" }, { "markdown": "___foo__\n", "html": "

    _foo

    \n", - "example": 455, - "start_line": 7247, - "end_line": 7251, + "example": 456, + "start_line": 7258, + "end_line": 7262, "section": "Emphasis and strong emphasis" }, { "markdown": "____foo_\n", "html": "

    ___foo

    \n", - "example": 456, - "start_line": 7254, - "end_line": 7258, + "example": 457, + "start_line": 7265, + "end_line": 7269, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo___\n", "html": "

    foo_

    \n", - "example": 457, - "start_line": 7261, - "end_line": 7265, + "example": 458, + "start_line": 7272, + "end_line": 7276, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo____\n", "html": "

    foo___

    \n", - "example": 458, - "start_line": 7268, - "end_line": 7272, + "example": 459, + "start_line": 7279, + "end_line": 7283, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo**\n", "html": "

    foo

    \n", - "example": 459, - "start_line": 7278, - "end_line": 7282, + "example": 460, + "start_line": 7289, + "end_line": 7293, "section": "Emphasis and strong emphasis" }, { "markdown": "*_foo_*\n", "html": "

    foo

    \n", - "example": 460, - "start_line": 7285, - "end_line": 7289, + "example": 461, + "start_line": 7296, + "end_line": 7300, "section": "Emphasis and strong emphasis" }, { "markdown": "__foo__\n", "html": "

    foo

    \n", - "example": 461, - "start_line": 7292, - "end_line": 7296, + "example": 462, + "start_line": 7303, + "end_line": 7307, "section": "Emphasis and strong emphasis" }, { "markdown": "_*foo*_\n", "html": "

    foo

    \n", - "example": 462, - "start_line": 7299, - "end_line": 7303, + "example": 463, + "start_line": 7310, + "end_line": 7314, "section": "Emphasis and strong emphasis" }, { "markdown": "****foo****\n", "html": "

    foo

    \n", - "example": 463, - "start_line": 7309, - "end_line": 7313, + "example": 464, + "start_line": 7320, + "end_line": 7324, "section": "Emphasis and strong emphasis" }, { "markdown": "____foo____\n", "html": "

    foo

    \n", - "example": 464, - "start_line": 7316, - "end_line": 7320, + "example": 465, + "start_line": 7327, + "end_line": 7331, "section": "Emphasis and strong emphasis" }, { "markdown": "******foo******\n", "html": "

    foo

    \n", - "example": 465, - "start_line": 7327, - "end_line": 7331, + "example": 466, + "start_line": 7338, + "end_line": 7342, "section": "Emphasis and strong emphasis" }, { "markdown": "***foo***\n", "html": "

    foo

    \n", - "example": 466, - "start_line": 7336, - "end_line": 7340, + "example": 467, + "start_line": 7347, + "end_line": 7351, "section": "Emphasis and strong emphasis" }, { "markdown": "_____foo_____\n", "html": "

    foo

    \n", - "example": 467, - "start_line": 7343, - "end_line": 7347, + "example": 468, + "start_line": 7354, + "end_line": 7358, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo _bar* baz_\n", "html": "

    foo _bar baz_

    \n", - "example": 468, - "start_line": 7352, - "end_line": 7356, + "example": 469, + "start_line": 7363, + "end_line": 7367, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo __bar *baz bim__ bam*\n", "html": "

    foo bar *baz bim bam

    \n", - "example": 469, - "start_line": 7359, - "end_line": 7363, + "example": 470, + "start_line": 7370, + "end_line": 7374, "section": "Emphasis and strong emphasis" }, { "markdown": "**foo **bar baz**\n", "html": "

    **foo bar baz

    \n", - "example": 470, - "start_line": 7368, - "end_line": 7372, + "example": 471, + "start_line": 7379, + "end_line": 7383, "section": "Emphasis and strong emphasis" }, { "markdown": "*foo *bar baz*\n", "html": "

    *foo bar baz

    \n", - "example": 471, - "start_line": 7375, - "end_line": 7379, + "example": 472, + "start_line": 7386, + "end_line": 7390, "section": "Emphasis and strong emphasis" }, { "markdown": "*[bar*](/url)\n", "html": "

    *bar*

    \n", - "example": 472, - "start_line": 7384, - "end_line": 7388, + "example": 473, + "start_line": 7395, + "end_line": 7399, "section": "Emphasis and strong emphasis" }, { "markdown": "_foo [bar_](/url)\n", "html": "

    _foo bar_

    \n", - "example": 473, - "start_line": 7391, - "end_line": 7395, + "example": 474, + "start_line": 7402, + "end_line": 7406, "section": "Emphasis and strong emphasis" }, { "markdown": "*\n", "html": "

    *

    \n", - "example": 474, - "start_line": 7398, - "end_line": 7402, + "example": 475, + "start_line": 7409, + "end_line": 7413, "section": "Emphasis and strong emphasis" }, { "markdown": "**\n", "html": "

    **

    \n", - "example": 475, - "start_line": 7405, - "end_line": 7409, + "example": 476, + "start_line": 7416, + "end_line": 7420, "section": "Emphasis and strong emphasis" }, { "markdown": "__\n", "html": "

    __

    \n", - "example": 476, - "start_line": 7412, - "end_line": 7416, + "example": 477, + "start_line": 7423, + "end_line": 7427, "section": "Emphasis and strong emphasis" }, { "markdown": "*a `*`*\n", "html": "

    a *

    \n", - "example": 477, - "start_line": 7419, - "end_line": 7423, + "example": 478, + "start_line": 7430, + "end_line": 7434, "section": "Emphasis and strong emphasis" }, { "markdown": "_a `_`_\n", "html": "

    a _

    \n", - "example": 478, - "start_line": 7426, - "end_line": 7430, + "example": 479, + "start_line": 7437, + "end_line": 7441, "section": "Emphasis and strong emphasis" }, { - "markdown": "**a\n", - "html": "

    **ahttp://foo.bar/?q=**

    \n", - "example": 479, - "start_line": 7433, - "end_line": 7437, + "markdown": "**a\n", + "html": "

    **ahttps://foo.bar/?q=**

    \n", + "example": 480, + "start_line": 7444, + "end_line": 7448, "section": "Emphasis and strong emphasis" }, { - "markdown": "__a\n", - "html": "

    __ahttp://foo.bar/?q=__

    \n", - "example": 480, - "start_line": 7440, - "end_line": 7444, + "markdown": "__a\n", + "html": "

    __ahttps://foo.bar/?q=__

    \n", + "example": 481, + "start_line": 7451, + "end_line": 7455, "section": "Emphasis and strong emphasis" }, { "markdown": "[link](/uri \"title\")\n", "html": "

    link

    \n", - "example": 481, - "start_line": 7528, - "end_line": 7532, + "example": 482, + "start_line": 7539, + "end_line": 7543, "section": "Links" }, { "markdown": "[link](/uri)\n", "html": "

    link

    \n", - "example": 482, - "start_line": 7538, - "end_line": 7542, + "example": 483, + "start_line": 7549, + "end_line": 7553, "section": "Links" }, { "markdown": "[](./target.md)\n", "html": "

    \n", - "example": 483, - "start_line": 7544, - "end_line": 7548, + "example": 484, + "start_line": 7555, + "end_line": 7559, "section": "Links" }, { "markdown": "[link]()\n", "html": "

    link

    \n", - "example": 484, - "start_line": 7551, - "end_line": 7555, + "example": 485, + "start_line": 7562, + "end_line": 7566, "section": "Links" }, { "markdown": "[link](<>)\n", "html": "

    link

    \n", - "example": 485, - "start_line": 7558, - "end_line": 7562, + "example": 486, + "start_line": 7569, + "end_line": 7573, "section": "Links" }, { "markdown": "[]()\n", "html": "

    \n", - "example": 486, - "start_line": 7565, - "end_line": 7569, + "example": 487, + "start_line": 7576, + "end_line": 7580, "section": "Links" }, { "markdown": "[link](/my uri)\n", "html": "

    [link](/my uri)

    \n", - "example": 487, - "start_line": 7574, - "end_line": 7578, + "example": 488, + "start_line": 7585, + "end_line": 7589, "section": "Links" }, { "markdown": "[link](
    )\n", "html": "

    link

    \n", - "example": 488, - "start_line": 7580, - "end_line": 7584, + "example": 489, + "start_line": 7591, + "end_line": 7595, "section": "Links" }, { "markdown": "[link](foo\nbar)\n", "html": "

    [link](foo\nbar)

    \n", - "example": 489, - "start_line": 7589, - "end_line": 7595, + "example": 490, + "start_line": 7600, + "end_line": 7606, "section": "Links" }, { "markdown": "[link]()\n", "html": "

    [link]()

    \n", - "example": 490, - "start_line": 7597, - "end_line": 7603, + "example": 491, + "start_line": 7608, + "end_line": 7614, "section": "Links" }, { "markdown": "[a]()\n", "html": "

    a

    \n", - "example": 491, - "start_line": 7608, - "end_line": 7612, + "example": 492, + "start_line": 7619, + "end_line": 7623, "section": "Links" }, { "markdown": "[link]()\n", "html": "

    [link](<foo>)

    \n", - "example": 492, - "start_line": 7616, - "end_line": 7620, + "example": 493, + "start_line": 7627, + "end_line": 7631, "section": "Links" }, { "markdown": "[a](\n[a](c)\n", "html": "

    [a](<b)c\n[a](<b)c>\n[a](c)

    \n", - "example": 493, - "start_line": 7625, - "end_line": 7633, + "example": 494, + "start_line": 7636, + "end_line": 7644, "section": "Links" }, { "markdown": "[link](\\(foo\\))\n", "html": "

    link

    \n", - "example": 494, - "start_line": 7637, - "end_line": 7641, + "example": 495, + "start_line": 7648, + "end_line": 7652, "section": "Links" }, { "markdown": "[link](foo(and(bar)))\n", "html": "

    link

    \n", - "example": 495, - "start_line": 7646, - "end_line": 7650, + "example": 496, + "start_line": 7657, + "end_line": 7661, "section": "Links" }, { "markdown": "[link](foo(and(bar))\n", "html": "

    [link](foo(and(bar))

    \n", - "example": 496, - "start_line": 7655, - "end_line": 7659, + "example": 497, + "start_line": 7666, + "end_line": 7670, "section": "Links" }, { "markdown": "[link](foo\\(and\\(bar\\))\n", "html": "

    link

    \n", - "example": 497, - "start_line": 7662, - "end_line": 7666, + "example": 498, + "start_line": 7673, + "end_line": 7677, "section": "Links" }, { "markdown": "[link]()\n", "html": "

    link

    \n", - "example": 498, - "start_line": 7669, - "end_line": 7673, + "example": 499, + "start_line": 7680, + "end_line": 7684, "section": "Links" }, { "markdown": "[link](foo\\)\\:)\n", "html": "

    link

    \n", - "example": 499, - "start_line": 7679, - "end_line": 7683, + "example": 500, + "start_line": 7690, + "end_line": 7694, "section": "Links" }, { - "markdown": "[link](#fragment)\n\n[link](http://example.com#fragment)\n\n[link](http://example.com?foo=3#frag)\n", - "html": "

    link

    \n

    link

    \n

    link

    \n", - "example": 500, - "start_line": 7688, - "end_line": 7698, + "markdown": "[link](#fragment)\n\n[link](https://example.com#fragment)\n\n[link](https://example.com?foo=3#frag)\n", + "html": "

    link

    \n

    link

    \n

    link

    \n", + "example": 501, + "start_line": 7699, + "end_line": 7709, "section": "Links" }, { "markdown": "[link](foo\\bar)\n", "html": "

    link

    \n", - "example": 501, - "start_line": 7704, - "end_line": 7708, + "example": 502, + "start_line": 7715, + "end_line": 7719, "section": "Links" }, { "markdown": "[link](foo%20bä)\n", "html": "

    link

    \n", - "example": 502, - "start_line": 7720, - "end_line": 7724, + "example": 503, + "start_line": 7731, + "end_line": 7735, "section": "Links" }, { "markdown": "[link](\"title\")\n", "html": "

    link

    \n", - "example": 503, - "start_line": 7731, - "end_line": 7735, + "example": 504, + "start_line": 7742, + "end_line": 7746, "section": "Links" }, { "markdown": "[link](/url \"title\")\n[link](/url 'title')\n[link](/url (title))\n", "html": "

    link\nlink\nlink

    \n", - "example": 504, - "start_line": 7740, - "end_line": 7748, + "example": 505, + "start_line": 7751, + "end_line": 7759, "section": "Links" }, { "markdown": "[link](/url \"title \\\""\")\n", "html": "

    link

    \n", - "example": 505, - "start_line": 7754, - "end_line": 7758, + "example": 506, + "start_line": 7765, + "end_line": 7769, "section": "Links" }, { "markdown": "[link](/url \"title\")\n", "html": "

    link

    \n", - "example": 506, - "start_line": 7765, - "end_line": 7769, + "example": 507, + "start_line": 7776, + "end_line": 7780, "section": "Links" }, { "markdown": "[link](/url \"title \"and\" title\")\n", "html": "

    [link](/url "title "and" title")

    \n", - "example": 507, - "start_line": 7774, - "end_line": 7778, + "example": 508, + "start_line": 7785, + "end_line": 7789, "section": "Links" }, { "markdown": "[link](/url 'title \"and\" title')\n", "html": "

    link

    \n", - "example": 508, - "start_line": 7783, - "end_line": 7787, + "example": 509, + "start_line": 7794, + "end_line": 7798, "section": "Links" }, { "markdown": "[link]( /uri\n \"title\" )\n", "html": "

    link

    \n", - "example": 509, - "start_line": 7808, - "end_line": 7813, + "example": 510, + "start_line": 7819, + "end_line": 7824, "section": "Links" }, { "markdown": "[link] (/uri)\n", "html": "

    [link] (/uri)

    \n", - "example": 510, - "start_line": 7819, - "end_line": 7823, + "example": 511, + "start_line": 7830, + "end_line": 7834, "section": "Links" }, { "markdown": "[link [foo [bar]]](/uri)\n", "html": "

    link [foo [bar]]

    \n", - "example": 511, - "start_line": 7829, - "end_line": 7833, + "example": 512, + "start_line": 7840, + "end_line": 7844, "section": "Links" }, { "markdown": "[link] bar](/uri)\n", "html": "

    [link] bar](/uri)

    \n", - "example": 512, - "start_line": 7836, - "end_line": 7840, + "example": 513, + "start_line": 7847, + "end_line": 7851, "section": "Links" }, { "markdown": "[link [bar](/uri)\n", "html": "

    [link bar

    \n", - "example": 513, - "start_line": 7843, - "end_line": 7847, + "example": 514, + "start_line": 7854, + "end_line": 7858, "section": "Links" }, { "markdown": "[link \\[bar](/uri)\n", "html": "

    link [bar

    \n", - "example": 514, - "start_line": 7850, - "end_line": 7854, + "example": 515, + "start_line": 7861, + "end_line": 7865, "section": "Links" }, { "markdown": "[link *foo **bar** `#`*](/uri)\n", "html": "

    link foo bar #

    \n", - "example": 515, - "start_line": 7859, - "end_line": 7863, + "example": 516, + "start_line": 7870, + "end_line": 7874, "section": "Links" }, { "markdown": "[![moon](moon.jpg)](/uri)\n", "html": "

    \"moon\"

    \n", - "example": 516, - "start_line": 7866, - "end_line": 7870, + "example": 517, + "start_line": 7877, + "end_line": 7881, "section": "Links" }, { "markdown": "[foo [bar](/uri)](/uri)\n", "html": "

    [foo bar](/uri)

    \n", - "example": 517, - "start_line": 7875, - "end_line": 7879, + "example": 518, + "start_line": 7886, + "end_line": 7890, "section": "Links" }, { "markdown": "[foo *[bar [baz](/uri)](/uri)*](/uri)\n", "html": "

    [foo [bar baz](/uri)](/uri)

    \n", - "example": 518, - "start_line": 7882, - "end_line": 7886, + "example": 519, + "start_line": 7893, + "end_line": 7897, "section": "Links" }, { "markdown": "![[[foo](uri1)](uri2)](uri3)\n", "html": "

    \"[foo](uri2)\"

    \n", - "example": 519, - "start_line": 7889, - "end_line": 7893, + "example": 520, + "start_line": 7900, + "end_line": 7904, "section": "Links" }, { "markdown": "*[foo*](/uri)\n", "html": "

    *foo*

    \n", - "example": 520, - "start_line": 7899, - "end_line": 7903, + "example": 521, + "start_line": 7910, + "end_line": 7914, "section": "Links" }, { "markdown": "[foo *bar](baz*)\n", "html": "

    foo *bar

    \n", - "example": 521, - "start_line": 7906, - "end_line": 7910, + "example": 522, + "start_line": 7917, + "end_line": 7921, "section": "Links" }, { "markdown": "*foo [bar* baz]\n", "html": "

    foo [bar baz]

    \n", - "example": 522, - "start_line": 7916, - "end_line": 7920, + "example": 523, + "start_line": 7927, + "end_line": 7931, "section": "Links" }, { "markdown": "[foo \n", "html": "

    [foo

    \n", - "example": 523, - "start_line": 7926, - "end_line": 7930, + "example": 524, + "start_line": 7937, + "end_line": 7941, "section": "Links" }, { "markdown": "[foo`](/uri)`\n", "html": "

    [foo](/uri)

    \n", - "example": 524, - "start_line": 7933, - "end_line": 7937, + "example": 525, + "start_line": 7944, + "end_line": 7948, "section": "Links" }, { - "markdown": "[foo\n", - "html": "

    [foohttp://example.com/?search=](uri)

    \n", - "example": 525, - "start_line": 7940, - "end_line": 7944, + "markdown": "[foo\n", + "html": "

    [foohttps://example.com/?search=](uri)

    \n", + "example": 526, + "start_line": 7951, + "end_line": 7955, "section": "Links" }, { "markdown": "[foo][bar]\n\n[bar]: /url \"title\"\n", "html": "

    foo

    \n", - "example": 526, - "start_line": 7978, - "end_line": 7984, + "example": 527, + "start_line": 7989, + "end_line": 7995, "section": "Links" }, { "markdown": "[link [foo [bar]]][ref]\n\n[ref]: /uri\n", "html": "

    link [foo [bar]]

    \n", - "example": 527, - "start_line": 7993, - "end_line": 7999, + "example": 528, + "start_line": 8004, + "end_line": 8010, "section": "Links" }, { "markdown": "[link \\[bar][ref]\n\n[ref]: /uri\n", "html": "

    link [bar

    \n", - "example": 528, - "start_line": 8002, - "end_line": 8008, + "example": 529, + "start_line": 8013, + "end_line": 8019, "section": "Links" }, { "markdown": "[link *foo **bar** `#`*][ref]\n\n[ref]: /uri\n", "html": "

    link foo bar #

    \n", - "example": 529, - "start_line": 8013, - "end_line": 8019, + "example": 530, + "start_line": 8024, + "end_line": 8030, "section": "Links" }, { "markdown": "[![moon](moon.jpg)][ref]\n\n[ref]: /uri\n", "html": "

    \"moon\"

    \n", - "example": 530, - "start_line": 8022, - "end_line": 8028, + "example": 531, + "start_line": 8033, + "end_line": 8039, "section": "Links" }, { "markdown": "[foo [bar](/uri)][ref]\n\n[ref]: /uri\n", "html": "

    [foo bar]ref

    \n", - "example": 531, - "start_line": 8033, - "end_line": 8039, + "example": 532, + "start_line": 8044, + "end_line": 8050, "section": "Links" }, { "markdown": "[foo *bar [baz][ref]*][ref]\n\n[ref]: /uri\n", "html": "

    [foo bar baz]ref

    \n", - "example": 532, - "start_line": 8042, - "end_line": 8048, + "example": 533, + "start_line": 8053, + "end_line": 8059, "section": "Links" }, { "markdown": "*[foo*][ref]\n\n[ref]: /uri\n", "html": "

    *foo*

    \n", - "example": 533, - "start_line": 8057, - "end_line": 8063, + "example": 534, + "start_line": 8068, + "end_line": 8074, "section": "Links" }, { "markdown": "[foo *bar][ref]*\n\n[ref]: /uri\n", "html": "

    foo *bar*

    \n", - "example": 534, - "start_line": 8066, - "end_line": 8072, + "example": 535, + "start_line": 8077, + "end_line": 8083, "section": "Links" }, { "markdown": "[foo \n\n[ref]: /uri\n", "html": "

    [foo

    \n", - "example": 535, - "start_line": 8078, - "end_line": 8084, + "example": 536, + "start_line": 8089, + "end_line": 8095, "section": "Links" }, { "markdown": "[foo`][ref]`\n\n[ref]: /uri\n", "html": "

    [foo][ref]

    \n", - "example": 536, - "start_line": 8087, - "end_line": 8093, - "section": "Links" - }, - { - "markdown": "[foo\n\n[ref]: /uri\n", - "html": "

    [foohttp://example.com/?search=][ref]

    \n", "example": 537, - "start_line": 8096, - "end_line": 8102, + "start_line": 8098, + "end_line": 8104, "section": "Links" }, { - "markdown": "[foo][BaR]\n\n[bar]: /url \"title\"\n", - "html": "

    foo

    \n", + "markdown": "[foo\n\n[ref]: /uri\n", + "html": "

    [foohttps://example.com/?search=][ref]

    \n", "example": 538, "start_line": 8107, "end_line": 8113, "section": "Links" }, { - "markdown": "[ẞ]\n\n[SS]: /url\n", - "html": "

    \n", + "markdown": "[foo][BaR]\n\n[bar]: /url \"title\"\n", + "html": "

    foo

    \n", "example": 539, "start_line": 8118, "end_line": 8124, "section": "Links" }, + { + "markdown": "[ẞ]\n\n[SS]: /url\n", + "html": "

    \n", + "example": 540, + "start_line": 8129, + "end_line": 8135, + "section": "Links" + }, { "markdown": "[Foo\n bar]: /url\n\n[Baz][Foo bar]\n", "html": "

    Baz

    \n", - "example": 540, - "start_line": 8130, - "end_line": 8137, + "example": 541, + "start_line": 8141, + "end_line": 8148, "section": "Links" }, { "markdown": "[foo] [bar]\n\n[bar]: /url \"title\"\n", "html": "

    [foo] bar

    \n", - "example": 541, - "start_line": 8143, - "end_line": 8149, + "example": 542, + "start_line": 8154, + "end_line": 8160, "section": "Links" }, { "markdown": "[foo]\n[bar]\n\n[bar]: /url \"title\"\n", "html": "

    [foo]\nbar

    \n", - "example": 542, - "start_line": 8152, - "end_line": 8160, + "example": 543, + "start_line": 8163, + "end_line": 8171, "section": "Links" }, { "markdown": "[foo]: /url1\n\n[foo]: /url2\n\n[bar][foo]\n", "html": "

    bar

    \n", - "example": 543, - "start_line": 8193, - "end_line": 8201, + "example": 544, + "start_line": 8204, + "end_line": 8212, "section": "Links" }, { "markdown": "[bar][foo\\!]\n\n[foo!]: /url\n", "html": "

    [bar][foo!]

    \n", - "example": 544, - "start_line": 8208, - "end_line": 8214, + "example": 545, + "start_line": 8219, + "end_line": 8225, "section": "Links" }, { "markdown": "[foo][ref[]\n\n[ref[]: /uri\n", "html": "

    [foo][ref[]

    \n

    [ref[]: /uri

    \n", - "example": 545, - "start_line": 8220, - "end_line": 8227, + "example": 546, + "start_line": 8231, + "end_line": 8238, "section": "Links" }, { "markdown": "[foo][ref[bar]]\n\n[ref[bar]]: /uri\n", "html": "

    [foo][ref[bar]]

    \n

    [ref[bar]]: /uri

    \n", - "example": 546, - "start_line": 8230, - "end_line": 8237, + "example": 547, + "start_line": 8241, + "end_line": 8248, "section": "Links" }, { "markdown": "[[[foo]]]\n\n[[[foo]]]: /url\n", "html": "

    [[[foo]]]

    \n

    [[[foo]]]: /url

    \n", - "example": 547, - "start_line": 8240, - "end_line": 8247, + "example": 548, + "start_line": 8251, + "end_line": 8258, "section": "Links" }, { "markdown": "[foo][ref\\[]\n\n[ref\\[]: /uri\n", "html": "

    foo

    \n", - "example": 548, - "start_line": 8250, - "end_line": 8256, + "example": 549, + "start_line": 8261, + "end_line": 8267, "section": "Links" }, { "markdown": "[bar\\\\]: /uri\n\n[bar\\\\]\n", "html": "

    bar\\

    \n", - "example": 549, - "start_line": 8261, - "end_line": 8267, + "example": 550, + "start_line": 8272, + "end_line": 8278, "section": "Links" }, { "markdown": "[]\n\n[]: /uri\n", "html": "

    []

    \n

    []: /uri

    \n", - "example": 550, - "start_line": 8273, - "end_line": 8280, + "example": 551, + "start_line": 8284, + "end_line": 8291, "section": "Links" }, { "markdown": "[\n ]\n\n[\n ]: /uri\n", "html": "

    [\n]

    \n

    [\n]: /uri

    \n", - "example": 551, - "start_line": 8283, - "end_line": 8294, + "example": 552, + "start_line": 8294, + "end_line": 8305, "section": "Links" }, { "markdown": "[foo][]\n\n[foo]: /url \"title\"\n", "html": "

    foo

    \n", - "example": 552, - "start_line": 8306, - "end_line": 8312, + "example": 553, + "start_line": 8317, + "end_line": 8323, "section": "Links" }, { "markdown": "[*foo* bar][]\n\n[*foo* bar]: /url \"title\"\n", "html": "

    foo bar

    \n", - "example": 553, - "start_line": 8315, - "end_line": 8321, + "example": 554, + "start_line": 8326, + "end_line": 8332, "section": "Links" }, { "markdown": "[Foo][]\n\n[foo]: /url \"title\"\n", "html": "

    Foo

    \n", - "example": 554, - "start_line": 8326, - "end_line": 8332, + "example": 555, + "start_line": 8337, + "end_line": 8343, "section": "Links" }, { "markdown": "[foo] \n[]\n\n[foo]: /url \"title\"\n", "html": "

    foo\n[]

    \n", - "example": 555, - "start_line": 8339, - "end_line": 8347, + "example": 556, + "start_line": 8350, + "end_line": 8358, "section": "Links" }, { "markdown": "[foo]\n\n[foo]: /url \"title\"\n", "html": "

    foo

    \n", - "example": 556, - "start_line": 8359, - "end_line": 8365, + "example": 557, + "start_line": 8370, + "end_line": 8376, "section": "Links" }, { "markdown": "[*foo* bar]\n\n[*foo* bar]: /url \"title\"\n", "html": "

    foo bar

    \n", - "example": 557, - "start_line": 8368, - "end_line": 8374, + "example": 558, + "start_line": 8379, + "end_line": 8385, "section": "Links" }, { "markdown": "[[*foo* bar]]\n\n[*foo* bar]: /url \"title\"\n", "html": "

    [foo bar]

    \n", - "example": 558, - "start_line": 8377, - "end_line": 8383, + "example": 559, + "start_line": 8388, + "end_line": 8394, "section": "Links" }, { "markdown": "[[bar [foo]\n\n[foo]: /url\n", "html": "

    [[bar foo

    \n", - "example": 559, - "start_line": 8386, - "end_line": 8392, + "example": 560, + "start_line": 8397, + "end_line": 8403, "section": "Links" }, { "markdown": "[Foo]\n\n[foo]: /url \"title\"\n", "html": "

    Foo

    \n", - "example": 560, - "start_line": 8397, - "end_line": 8403, + "example": 561, + "start_line": 8408, + "end_line": 8414, "section": "Links" }, { "markdown": "[foo] bar\n\n[foo]: /url\n", "html": "

    foo bar

    \n", - "example": 561, - "start_line": 8408, - "end_line": 8414, + "example": 562, + "start_line": 8419, + "end_line": 8425, "section": "Links" }, { "markdown": "\\[foo]\n\n[foo]: /url \"title\"\n", "html": "

    [foo]

    \n", - "example": 562, - "start_line": 8420, - "end_line": 8426, + "example": 563, + "start_line": 8431, + "end_line": 8437, "section": "Links" }, { "markdown": "[foo*]: /url\n\n*[foo*]\n", "html": "

    *foo*

    \n", - "example": 563, - "start_line": 8432, - "end_line": 8438, + "example": 564, + "start_line": 8443, + "end_line": 8449, "section": "Links" }, { "markdown": "[foo][bar]\n\n[foo]: /url1\n[bar]: /url2\n", "html": "

    foo

    \n", - "example": 564, - "start_line": 8444, - "end_line": 8451, + "example": 565, + "start_line": 8455, + "end_line": 8462, "section": "Links" }, { "markdown": "[foo][]\n\n[foo]: /url1\n", "html": "

    foo

    \n", - "example": 565, - "start_line": 8453, - "end_line": 8459, + "example": 566, + "start_line": 8464, + "end_line": 8470, "section": "Links" }, { "markdown": "[foo]()\n\n[foo]: /url1\n", "html": "

    foo

    \n", - "example": 566, - "start_line": 8463, - "end_line": 8469, + "example": 567, + "start_line": 8474, + "end_line": 8480, "section": "Links" }, { "markdown": "[foo](not a link)\n\n[foo]: /url1\n", "html": "

    foo(not a link)

    \n", - "example": 567, - "start_line": 8471, - "end_line": 8477, + "example": 568, + "start_line": 8482, + "end_line": 8488, "section": "Links" }, { "markdown": "[foo][bar][baz]\n\n[baz]: /url\n", "html": "

    [foo]bar

    \n", - "example": 568, - "start_line": 8482, - "end_line": 8488, + "example": 569, + "start_line": 8493, + "end_line": 8499, "section": "Links" }, { "markdown": "[foo][bar][baz]\n\n[baz]: /url1\n[bar]: /url2\n", "html": "

    foobaz

    \n", - "example": 569, - "start_line": 8494, - "end_line": 8501, + "example": 570, + "start_line": 8505, + "end_line": 8512, "section": "Links" }, { "markdown": "[foo][bar][baz]\n\n[baz]: /url1\n[foo]: /url2\n", "html": "

    [foo]bar

    \n", - "example": 570, - "start_line": 8507, - "end_line": 8514, + "example": 571, + "start_line": 8518, + "end_line": 8525, "section": "Links" }, { "markdown": "![foo](/url \"title\")\n", "html": "

    \"foo\"

    \n", - "example": 571, - "start_line": 8530, - "end_line": 8534, + "example": 572, + "start_line": 8541, + "end_line": 8545, "section": "Images" }, { "markdown": "![foo *bar*]\n\n[foo *bar*]: train.jpg \"train & tracks\"\n", "html": "

    \"foo

    \n", - "example": 572, - "start_line": 8537, - "end_line": 8543, + "example": 573, + "start_line": 8548, + "end_line": 8554, "section": "Images" }, { "markdown": "![foo ![bar](/url)](/url2)\n", "html": "

    \"foo

    \n", - "example": 573, - "start_line": 8546, - "end_line": 8550, + "example": 574, + "start_line": 8557, + "end_line": 8561, "section": "Images" }, { "markdown": "![foo [bar](/url)](/url2)\n", "html": "

    \"foo

    \n", - "example": 574, - "start_line": 8553, - "end_line": 8557, + "example": 575, + "start_line": 8564, + "end_line": 8568, "section": "Images" }, { "markdown": "![foo *bar*][]\n\n[foo *bar*]: train.jpg \"train & tracks\"\n", "html": "

    \"foo

    \n", - "example": 575, - "start_line": 8567, - "end_line": 8573, + "example": 576, + "start_line": 8578, + "end_line": 8584, "section": "Images" }, { "markdown": "![foo *bar*][foobar]\n\n[FOOBAR]: train.jpg \"train & tracks\"\n", "html": "

    \"foo

    \n", - "example": 576, - "start_line": 8576, - "end_line": 8582, + "example": 577, + "start_line": 8587, + "end_line": 8593, "section": "Images" }, { "markdown": "![foo](train.jpg)\n", "html": "

    \"foo\"

    \n", - "example": 577, - "start_line": 8585, - "end_line": 8589, + "example": 578, + "start_line": 8596, + "end_line": 8600, "section": "Images" }, { "markdown": "My ![foo bar](/path/to/train.jpg \"title\" )\n", "html": "

    My \"foo

    \n", - "example": 578, - "start_line": 8592, - "end_line": 8596, + "example": 579, + "start_line": 8603, + "end_line": 8607, "section": "Images" }, { "markdown": "![foo]()\n", "html": "

    \"foo\"

    \n", - "example": 579, - "start_line": 8599, - "end_line": 8603, + "example": 580, + "start_line": 8610, + "end_line": 8614, "section": "Images" }, { "markdown": "![](/url)\n", "html": "

    \"\"

    \n", - "example": 580, - "start_line": 8606, - "end_line": 8610, + "example": 581, + "start_line": 8617, + "end_line": 8621, "section": "Images" }, { "markdown": "![foo][bar]\n\n[bar]: /url\n", "html": "

    \"foo\"

    \n", - "example": 581, - "start_line": 8615, - "end_line": 8621, + "example": 582, + "start_line": 8626, + "end_line": 8632, "section": "Images" }, { "markdown": "![foo][bar]\n\n[BAR]: /url\n", "html": "

    \"foo\"

    \n", - "example": 582, - "start_line": 8624, - "end_line": 8630, + "example": 583, + "start_line": 8635, + "end_line": 8641, "section": "Images" }, { "markdown": "![foo][]\n\n[foo]: /url \"title\"\n", "html": "

    \"foo\"

    \n", - "example": 583, - "start_line": 8635, - "end_line": 8641, + "example": 584, + "start_line": 8646, + "end_line": 8652, "section": "Images" }, { "markdown": "![*foo* bar][]\n\n[*foo* bar]: /url \"title\"\n", "html": "

    \"foo

    \n", - "example": 584, - "start_line": 8644, - "end_line": 8650, + "example": 585, + "start_line": 8655, + "end_line": 8661, "section": "Images" }, { "markdown": "![Foo][]\n\n[foo]: /url \"title\"\n", "html": "

    \"Foo\"

    \n", - "example": 585, - "start_line": 8655, - "end_line": 8661, + "example": 586, + "start_line": 8666, + "end_line": 8672, "section": "Images" }, { "markdown": "![foo] \n[]\n\n[foo]: /url \"title\"\n", "html": "

    \"foo\"\n[]

    \n", - "example": 586, - "start_line": 8667, - "end_line": 8675, + "example": 587, + "start_line": 8678, + "end_line": 8686, "section": "Images" }, { "markdown": "![foo]\n\n[foo]: /url \"title\"\n", "html": "

    \"foo\"

    \n", - "example": 587, - "start_line": 8680, - "end_line": 8686, + "example": 588, + "start_line": 8691, + "end_line": 8697, "section": "Images" }, { "markdown": "![*foo* bar]\n\n[*foo* bar]: /url \"title\"\n", "html": "

    \"foo

    \n", - "example": 588, - "start_line": 8689, - "end_line": 8695, + "example": 589, + "start_line": 8700, + "end_line": 8706, "section": "Images" }, { "markdown": "![[foo]]\n\n[[foo]]: /url \"title\"\n", "html": "

    ![[foo]]

    \n

    [[foo]]: /url "title"

    \n", - "example": 589, - "start_line": 8700, - "end_line": 8707, + "example": 590, + "start_line": 8711, + "end_line": 8718, "section": "Images" }, { "markdown": "![Foo]\n\n[foo]: /url \"title\"\n", "html": "

    \"Foo\"

    \n", - "example": 590, - "start_line": 8712, - "end_line": 8718, + "example": 591, + "start_line": 8723, + "end_line": 8729, "section": "Images" }, { "markdown": "!\\[foo]\n\n[foo]: /url \"title\"\n", "html": "

    ![foo]

    \n", - "example": 591, - "start_line": 8724, - "end_line": 8730, + "example": 592, + "start_line": 8735, + "end_line": 8741, "section": "Images" }, { "markdown": "\\![foo]\n\n[foo]: /url \"title\"\n", "html": "

    !foo

    \n", - "example": 592, - "start_line": 8736, - "end_line": 8742, + "example": 593, + "start_line": 8747, + "end_line": 8753, "section": "Images" }, { "markdown": "\n", "html": "

    http://foo.bar.baz

    \n", - "example": 593, - "start_line": 8769, - "end_line": 8773, + "example": 594, + "start_line": 8780, + "end_line": 8784, "section": "Autolinks" }, { - "markdown": "\n", - "html": "

    http://foo.bar.baz/test?q=hello&id=22&boolean

    \n", - "example": 594, - "start_line": 8776, - "end_line": 8780, + "markdown": "\n", + "html": "

    https://foo.bar.baz/test?q=hello&id=22&boolean

    \n", + "example": 595, + "start_line": 8787, + "end_line": 8791, "section": "Autolinks" }, { "markdown": "\n", "html": "

    irc://foo.bar:2233/baz

    \n", - "example": 595, - "start_line": 8783, - "end_line": 8787, + "example": 596, + "start_line": 8794, + "end_line": 8798, "section": "Autolinks" }, { "markdown": "\n", "html": "

    MAILTO:FOO@BAR.BAZ

    \n", - "example": 596, - "start_line": 8792, - "end_line": 8796, + "example": 597, + "start_line": 8803, + "end_line": 8807, "section": "Autolinks" }, { "markdown": "\n", "html": "

    a+b+c:d

    \n", - "example": 597, - "start_line": 8804, - "end_line": 8808, + "example": 598, + "start_line": 8815, + "end_line": 8819, "section": "Autolinks" }, { "markdown": "\n", "html": "

    made-up-scheme://foo,bar

    \n", - "example": 598, - "start_line": 8811, - "end_line": 8815, + "example": 599, + "start_line": 8822, + "end_line": 8826, "section": "Autolinks" }, { - "markdown": "\n", - "html": "

    http://../

    \n", - "example": 599, - "start_line": 8818, - "end_line": 8822, + "markdown": "\n", + "html": "

    https://../

    \n", + "example": 600, + "start_line": 8829, + "end_line": 8833, "section": "Autolinks" }, { "markdown": "\n", "html": "

    localhost:5001/foo

    \n", - "example": 600, - "start_line": 8825, - "end_line": 8829, + "example": 601, + "start_line": 8836, + "end_line": 8840, "section": "Autolinks" }, { - "markdown": "\n", - "html": "

    <http://foo.bar/baz bim>

    \n", - "example": 601, - "start_line": 8834, - "end_line": 8838, + "markdown": "\n", + "html": "

    <https://foo.bar/baz bim>

    \n", + "example": 602, + "start_line": 8845, + "end_line": 8849, "section": "Autolinks" }, { - "markdown": "\n", - "html": "

    http://example.com/\\[\\

    \n", - "example": 602, - "start_line": 8843, - "end_line": 8847, + "markdown": "\n", + "html": "

    https://example.com/\\[\\

    \n", + "example": 603, + "start_line": 8854, + "end_line": 8858, "section": "Autolinks" }, { "markdown": "\n", "html": "

    foo@bar.example.com

    \n", - "example": 603, - "start_line": 8865, - "end_line": 8869, + "example": 604, + "start_line": 8876, + "end_line": 8880, "section": "Autolinks" }, { "markdown": "\n", "html": "

    foo+special@Bar.baz-bar0.com

    \n", - "example": 604, - "start_line": 8872, - "end_line": 8876, + "example": 605, + "start_line": 8883, + "end_line": 8887, "section": "Autolinks" }, { "markdown": "\n", "html": "

    <foo+@bar.example.com>

    \n", - "example": 605, - "start_line": 8881, - "end_line": 8885, + "example": 606, + "start_line": 8892, + "end_line": 8896, "section": "Autolinks" }, { "markdown": "<>\n", "html": "

    <>

    \n", - "example": 606, - "start_line": 8890, - "end_line": 8894, + "example": 607, + "start_line": 8901, + "end_line": 8905, "section": "Autolinks" }, { - "markdown": "< http://foo.bar >\n", - "html": "

    < http://foo.bar >

    \n", - "example": 607, - "start_line": 8897, - "end_line": 8901, + "markdown": "< https://foo.bar >\n", + "html": "

    < https://foo.bar >

    \n", + "example": 608, + "start_line": 8908, + "end_line": 8912, "section": "Autolinks" }, { "markdown": "\n", "html": "

    <m:abc>

    \n", - "example": 608, - "start_line": 8904, - "end_line": 8908, + "example": 609, + "start_line": 8915, + "end_line": 8919, "section": "Autolinks" }, { "markdown": "\n", "html": "

    <foo.bar.baz>

    \n", - "example": 609, - "start_line": 8911, - "end_line": 8915, + "example": 610, + "start_line": 8922, + "end_line": 8926, "section": "Autolinks" }, { - "markdown": "http://example.com/n", - "html": "

    http://example.com

    \n", - "example": 610, - "start_line": 8918, - "end_line": 8922, + "markdown": "https://example.com/n", + "html": "

    https://example.com

    \n", + "example": 611, + "start_line": 8929, + "end_line": 8933, "section": "Autolinks" }, { "markdown": "foo@bar.example.com\n", "html": "

    foo@bar.example.com

    \n", - "example": 611, - "start_line": 8925, - "end_line": 8929, + "example": 612, + "start_line": 8936, + "end_line": 8940, "section": "Autolinks" }, { "markdown": "\n", "html": "

    \n", - "example": 612, - "start_line": 9006, - "end_line": 9010, + "example": 613, + "start_line": 9016, + "end_line": 9020, "section": "Raw HTML" }, { "markdown": "\n", "html": "

    \n", - "example": 613, - "start_line": 9015, - "end_line": 9019, + "example": 614, + "start_line": 9025, + "end_line": 9029, "section": "Raw HTML" }, { "markdown": "\n", "html": "

    \n", - "example": 614, - "start_line": 9024, - "end_line": 9030, + "example": 615, + "start_line": 9034, + "end_line": 9040, "section": "Raw HTML" }, { "markdown": "\n", "html": "

    \n", - "example": 615, - "start_line": 9035, - "end_line": 9041, + "example": 616, + "start_line": 9045, + "end_line": 9051, "section": "Raw HTML" }, { "markdown": "Foo \n", "html": "

    Foo

    \n", - "example": 616, - "start_line": 9046, - "end_line": 9050, + "example": 617, + "start_line": 9056, + "end_line": 9060, "section": "Raw HTML" }, { "markdown": "<33> <__>\n", "html": "

    <33> <__>

    \n", - "example": 617, - "start_line": 9055, - "end_line": 9059, + "example": 618, + "start_line": 9065, + "end_line": 9069, "section": "Raw HTML" }, { "markdown": "
    \n", "html": "

    <a h*#ref="hi">

    \n", - "example": 618, - "start_line": 9064, - "end_line": 9068, + "example": 619, + "start_line": 9074, + "end_line": 9078, "section": "Raw HTML" }, { "markdown": "
    \n", "html": "

    <a href="hi'> <a href=hi'>

    \n", - "example": 619, - "start_line": 9073, - "end_line": 9077, + "example": 620, + "start_line": 9083, + "end_line": 9087, "section": "Raw HTML" }, { "markdown": "< a><\nfoo>\n\n", "html": "

    < a><\nfoo><bar/ >\n<foo bar=baz\nbim!bop />

    \n", - "example": 620, - "start_line": 9082, - "end_line": 9092, + "example": 621, + "start_line": 9092, + "end_line": 9102, "section": "Raw HTML" }, { "markdown": "
    \n", "html": "

    <a href='https://github.com/bar'title=title>

    \n", - "example": 621, - "start_line": 9097, - "end_line": 9101, + "example": 622, + "start_line": 9107, + "end_line": 9111, "section": "Raw HTML" }, { "markdown": "
    \n", "html": "

    \n", - "example": 622, - "start_line": 9106, - "end_line": 9110, + "example": 623, + "start_line": 9116, + "end_line": 9120, "section": "Raw HTML" }, { "markdown": "\n", "html": "

    </a href="foo">

    \n", - "example": 623, - "start_line": 9115, - "end_line": 9119, - "section": "Raw HTML" - }, - { - "markdown": "foo \n", - "html": "

    foo

    \n", "example": 624, - "start_line": 9124, - "end_line": 9130, + "start_line": 9125, + "end_line": 9129, "section": "Raw HTML" }, { - "markdown": "foo \n", - "html": "

    foo <!-- not a comment -- two hyphens -->

    \n", + "markdown": "foo \n", + "html": "

    foo

    \n", "example": 625, - "start_line": 9133, - "end_line": 9137, + "start_line": 9134, + "end_line": 9140, "section": "Raw HTML" }, { - "markdown": "foo foo -->\n\nfoo \n", - "html": "

    foo <!--> foo -->

    \n

    foo <!-- foo--->

    \n", + "markdown": "foo foo -->\n\nfoo foo -->\n", + "html": "

    foo foo -->

    \n

    foo foo -->

    \n", "example": 626, "start_line": 9142, "end_line": 9149, diff --git a/tests/test_cmark_spec/get_cmark_spec.py b/tests/test_cmark_spec/get_cmark_spec.py index 851cad75..d59364f0 100644 --- a/tests/test_cmark_spec/get_cmark_spec.py +++ b/tests/test_cmark_spec/get_cmark_spec.py @@ -4,10 +4,12 @@ # ] # /// from pathlib import Path +from typing import Any -default_version = "0.30" -default_output_path = Path(__file__).parent / "commonmark.json" -default_fixture_test_path = ( +default_version = "0.31.2" +default_json_path = Path(__file__).parent / "commonmark.json" +default_text_path = Path(__file__).parent / "spec.md" +default_fixture_path = ( Path(__file__).parent.parent / "test_port" / "fixtures" / "commonmark_spec.md" ) @@ -23,55 +25,72 @@ def create_argparser(): help=f"CommonMark spec version to download (default: {default_version})", ) parser.add_argument( - "--output", - "-o", + "--output-json", type=Path, - default=default_output_path, - help=f"Output file path (default: {default_output_path})", + default=default_json_path, + help=f"Output file path (default: {default_json_path})", ) parser.add_argument( - "--test-fixture", + "--output-text", type=Path, - default=default_fixture_test_path, - help=f"Write to test fixture (default: {default_fixture_test_path})", + default=default_text_path, + help=f"Output file path (default: {default_text_path})", + ) + parser.add_argument( + "--output-fixture", + type=Path, + default=default_fixture_path, + help=f"Write to test fixture (default: {default_fixture_path})", ) return parser +def _json_to_fixture(data: list[dict[str, Any]]) -> str: + text = "" + for item in data: + text += "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + text += f"src line: {item['start_line'] - 1}\n\n" + text += f".\n{item['markdown']}.\n{item['html']}.\n\n" + return text + + if __name__ == "__main__": import requests # type: ignore[import-untyped] args = create_argparser().parse_args() version: str = args.version - output_path: Path = args.output - write_to_test_fixture = True - test_fixture: Path = args.test_fixture + json_path: Path = args.output_json + txt_path: Path = args.output_text + test_fixture: Path = args.output_fixture + changed = False - url = f"https://spec.commonmark.org/%7Bversion%7D/spec.json" - print(f"Downloading CommonMark spec from {url}") - response = requests.get(url) - response.raise_for_status() - if not output_path.exists() or output_path.read_text() != response.text: - changed = True - with output_path.open("w") as f: - f.write(response.text) - print(f"Updated to {output_path}") - else: - print(f"Spec file {output_path} is up to date, not overwriting") - if write_to_test_fixture: - data = response.json() - text = "" - for item in data: - text += "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" - text += f"src line: {item['start_line'] - 1}\n\n" - text += f".\n{item['markdown']}.\n{item['html']}.\n\n" - if not test_fixture.exists() or test_fixture.read_text() != text: + json_url = f"https://spec.commonmark.org/%7Bversion%7D/spec.json" + txt_url = f"https://raw.githubusercontent.com/commonmark/commonmark-spec/refs/tags/%7Bversion%7D/spec.txt" + + for url, output_path in ((json_url, json_path), (txt_url, txt_path)): + print(f"Downloading CommonMark spec from {url}") + response = requests.get(url) + response.raise_for_status() + if not output_path.exists() or output_path.read_text() != response.text: changed = True - with test_fixture.open("w") as f: - f.write(text) - print(f"Also updated to {test_fixture}") + with output_path.open("w") as f: + f.write(response.text) + print(f"Updated to {output_path}") else: - print(f"Fixture file {test_fixture} is up to date, not overwriting") + print(f"File {output_path} is up to date, not overwriting") + + # write_to_test_fixture: + response = requests.get(json_url) + response.raise_for_status() + data = response.json() + text = _json_to_fixture(data) + if not test_fixture.exists() or test_fixture.read_text() != text: + changed = True + with test_fixture.open("w") as f: + f.write(text) + print(f"Also updated to {test_fixture}") + else: + print(f"Fixture file {test_fixture} is up to date, not overwriting") raise SystemExit(0 if not changed else 1) diff --git a/tests/test_cmark_spec/spec.md b/tests/test_cmark_spec/spec.md index 2d79f7b7..f1fab281 100644 --- a/tests/test_cmark_spec/spec.md +++ b/tests/test_cmark_spec/spec.md @@ -1,9 +1,9 @@ --- title: CommonMark Spec author: John MacFarlane -version: 0.30 -date: '2021-06-19' -license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' +version: '0.31.2' +date: '2024-01-28' +license: '[CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)' ... # Introduction @@ -14,7 +14,7 @@ Markdown is a plain text format for writing structured documents, based on conventions for indicating formatting in email and usenet posts. It was developed by John Gruber (with help from Aaron Swartz) and released in 2004 in the form of a -[syntax description](http://daringfireball.net/projects/markdown/syntax) +[syntax description](https://daringfireball.net/projects/markdown/syntax) and a Perl script (`Markdown.pl`) for converting Markdown to HTML. In the next decade, dozens of implementations were developed in many languages. Some extended the original @@ -34,10 +34,10 @@ As Gruber writes: > Markdown-formatted document should be publishable as-is, as > plain text, without looking like it's been marked up with tags > or formatting instructions. -> () +> () The point can be illustrated by comparing a sample of -[AsciiDoc](http://www.methods.co.nz/asciidoc/) with +[AsciiDoc](https://asciidoc.org/) with an equivalent sample of Markdown. Here is a sample of AsciiDoc from the AsciiDoc manual: @@ -103,7 +103,7 @@ source, not just in the processed document. ## Why is a spec needed? John Gruber's [canonical description of Markdown's -syntax](http://daringfireball.net/projects/markdown/syntax) +syntax](https://daringfireball.net/projects/markdown/syntax) does not specify the syntax unambiguously. Here are some examples of questions it does not answer: @@ -114,7 +114,7 @@ questions it does not answer: not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for users in real documents. (See [this comment by John - Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) 2. Is a blank line needed before a block quote or heading? Most implementations do not require the blank line. However, @@ -122,7 +122,7 @@ questions it does not answer: also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). (John Gruber has also spoken [in favor of requiring the blank - lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) 3. Is a blank line needed before an indented code block? (`Markdown.pl` requires it, but this is not mentioned in the @@ -155,7 +155,7 @@ questions it does not answer: ``` (There are some relevant comments by John Gruber - [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) 5. Can list markers be indented? Can ordered list markers be right-aligned? @@ -316,9 +316,9 @@ A line containing no characters, or a line containing only spaces The following definitions of character classes will be used in this spec: -A [Unicode whitespace character](@) is -any code point in the Unicode `Zs` general category, or a tab (`U+0009`), -line feed (`U+000A`), form feed (`U+000C`), or carriage return (`U+000D`). +A [Unicode whitespace character](@) is a character in the Unicode `Zs` general +category, or a tab (`U+0009`), line feed (`U+000A`), form feed (`U+000C`), or +carriage return (`U+000D`). [Unicode whitespace](@) is a sequence of one or more [Unicode whitespace characters]. @@ -337,9 +337,8 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, `[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), `{`, `|`, `}`, or `~` (U+007B–007E). -A [Unicode punctuation character](@) is an [ASCII -punctuation character] or anything in -the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. +A [Unicode punctuation character](@) is a character in the Unicode `P` +(puncuation) or `S` (symbol) general categories. ## Tabs @@ -579,9 +578,9 @@ raw HTML: ```````````````````````````````` example - + . -

    http://example.com?find=\*

    +

    https://example.com?find=\*

    ```````````````````````````````` @@ -1330,10 +1329,7 @@ interpretable as a [code fence], [ATX heading][ATX headings], A [setext heading underline](@) is a sequence of `=` characters or a sequence of `-` characters, with no more than 3 -spaces of indentation and any number of trailing spaces or tabs. If a line -containing a single `-` can be interpreted as an -empty [list items], it should be interpreted this way -and not as a [setext heading underline]. +spaces of indentation and any number of trailing spaces or tabs. The heading is a level 1 heading if `=` characters are used in the [setext heading underline], and a level 2 heading if `-` @@ -1967,7 +1963,7 @@ has been found, the code block contains all of the lines after the opening code fence until the end of the containing block (or document). (An alternative spec would require backtracking in the event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real down side to the +much less efficient, and there seems to be no real downside to the behavior described here.) A fenced code block may interrupt a paragraph, and does not require @@ -2397,7 +2393,7 @@ followed by an ASCII letter.\ ``. -6. **Start condition:** line begins the string `<` or ``, or the string `/>`.\ @@ -4118,7 +4114,7 @@ The following rules define [list items]: blocks *Bs* starting with a character other than a space or tab, and *M* is a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces of indentation, then the result of prepending *M* and the following spaces to the first line - of Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a + of *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a list item with *Bs* as its contents. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a start @@ -5353,11 +5349,11 @@ by itself should be a paragraph followed by a nested sublist. Since it is well established Markdown practice to allow lists to interrupt paragraphs inside list items, the [principle of uniformity] requires us to allow this outside list items as -well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) +well. ([reStructuredText](https://docutils.sourceforge.net/rst.html) takes a different approach, requiring blank lines before lists even inside other list items.) -In order to solve of unwanted lists in paragraphs with +In order to solve the problem of unwanted lists in paragraphs with hard-wrapped numerals, we allow only lists starting with `1` to interrupt paragraphs. Thus, @@ -6058,18 +6054,18 @@ But this is an HTML tag: And this is code: ```````````````````````````````` example -`` +`` . -

    <http://foo.bar.baz>`

    +

    <https://foo.bar.baz>`

    ```````````````````````````````` But this is an autolink: ```````````````````````````````` example -` +` . -

    http://foo.bar.`baz`

    +

    https://foo.bar.`baz`

    ```````````````````````````````` @@ -6102,7 +6098,7 @@ closing backtick strings to be equal in length: ## Emphasis and strong emphasis John Gruber's original [Markdown syntax -description](http://daringfireball.net/projects/markdown/syntax#em) says: +description](https://daringfireball.net/projects/markdown/syntax#em) says: > Markdown treats asterisks (`*`) and underscores (`_`) as indicators of > emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML @@ -6204,7 +6200,7 @@ Here are some examples of delimiter runs. (The idea of distinguishing left-flanking and right-flanking delimiter runs based on the character before and the character after comes from Roopesh Chander's -[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). +[vfmd](https://web.archive.org/web/20220608143320/http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). vfmd uses the terminology "emphasis indicator string" instead of "delimiter run," and its rules for distinguishing left- and right-flanking runs are a bit more complex than the ones given here.) @@ -6346,6 +6342,21 @@ Unicode nonbreaking spaces count as whitespace, too: ```````````````````````````````` +Unicode symbols count as punctuation, too: + +```````````````````````````````` example +*$*alpha. + +*£*bravo. + +*€*charlie. +. +

    *$*alpha.

    +

    *£*bravo.

    +

    *€*charlie.

    +```````````````````````````````` + + Intraword emphasis with `*` is permitted: ```````````````````````````````` example @@ -7431,16 +7442,16 @@ _a `_`_ ```````````````````````````````` example -**a +**a . -

    **ahttp://foo.bar/?q=**

    +

    **ahttps://foo.bar/?q=**

    ```````````````````````````````` ```````````````````````````````` example -__a +__a . -

    __ahttp://foo.bar/?q=__

    +

    __ahttps://foo.bar/?q=__

    ```````````````````````````````` @@ -7688,13 +7699,13 @@ A link can contain fragment identifiers and queries: ```````````````````````````````` example [link](#fragment) -[link](http://example.com#fragment) +[link](https://example.com#fragment) -[link](http://example.com?foo=3#frag) +[link](https://example.com?foo=3#frag) .

    link

    -

    link

    -

    link

    +

    link

    +

    link

    ```````````````````````````````` @@ -7938,9 +7949,9 @@ and autolinks over link grouping: ```````````````````````````````` example -[foo +[foo . -

    [foohttp://example.com/?search=](uri)

    +

    [foohttps://example.com/?search=](uri)

    ```````````````````````````````` @@ -8094,11 +8105,11 @@ and autolinks over link grouping: ```````````````````````````````` example -[foo +[foo [ref]: /uri . -

    [foohttp://example.com/?search=][ref]

    +

    [foohttps://example.com/?search=][ref]

    ```````````````````````````````` @@ -8298,7 +8309,7 @@ A [collapsed reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document, followed by the string `[]`. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching reference link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`. @@ -8351,7 +8362,7 @@ A [shortcut reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document and is not followed by `[]` or a link label. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching link reference definition. Thus, `[foo]` is equivalent to `[foo][]`. @@ -8438,7 +8449,7 @@ following closing bracket: ```````````````````````````````` -Full and compact references take precedence over shortcut +Full and collapsed references take precedence over shortcut references: ```````````````````````````````` example @@ -8754,7 +8765,7 @@ a link to the URI, with the URI as the link's label. An [absolute URI](@), for these purposes, consists of a [scheme] followed by a colon (`:`) -followed by zero or more characters other [ASCII control +followed by zero or more characters other than [ASCII control characters][ASCII control character], [space], `<`, and `>`. If the URI includes these characters, they must be percent-encoded (e.g. `%20` for a space). @@ -8774,9 +8785,9 @@ Here are some valid autolinks: ```````````````````````````````` example - + . -

    http://foo.bar.baz/test?q=hello&id=22&boolean

    +

    https://foo.bar.baz/test?q=hello&id=22&boolean

    ```````````````````````````````` @@ -8816,9 +8827,9 @@ with their syntax: ```````````````````````````````` example - + . -

    http://../

    +

    https://../

    ```````````````````````````````` @@ -8832,18 +8843,18 @@ with their syntax: Spaces are not allowed in autolinks: ```````````````````````````````` example - + . -

    <http://foo.bar/baz bim>

    +

    <https://foo.bar/baz bim>

    ```````````````````````````````` Backslash-escapes do not work inside autolinks: ```````````````````````````````` example - + . -

    http://example.com/\[\

    +

    https://example.com/\[\

    ```````````````````````````````` @@ -8895,9 +8906,9 @@ These are not autolinks: ```````````````````````````````` example -< http://foo.bar > +< https://foo.bar > . -

    < http://foo.bar >

    +

    < https://foo.bar >

    ```````````````````````````````` @@ -8916,9 +8927,9 @@ These are not autolinks: ```````````````````````````````` example -http://example.com +https://example.com . -

    http://example.com

    +

    https://example.com

    ```````````````````````````````` @@ -8980,10 +8991,9 @@ A [closing tag](@) consists of the string ``. -An [HTML comment](@) consists of ``, -where *text* does not start with `>` or `->`, does not end with `-`, -and does not contain `--`. (See the -[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) +An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the +[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). A [processing instruction](@) consists of the string ` +foo . -

    foo

    +

    foo

    ```````````````````````````````` - -```````````````````````````````` example -foo -. -

    foo <!-- not a comment -- two hyphens -->

    -```````````````````````````````` - - -Not comments: - ```````````````````````````````` example foo foo --> -foo +foo foo --> . -

    foo <!--> foo -->

    -

    foo <!-- foo--->

    +

    foo foo -->

    +

    foo foo -->

    ```````````````````````````````` @@ -9674,7 +9674,7 @@ through the stack for an opening `[` or `![` delimiter. delimiter from the stack, and return a literal text node `]`. - If we find one and it's active, then we parse ahead to see if - we have an inline link/image, reference link/image, compact reference + we have an inline link/image, reference link/image, collapsed reference link/image, or shortcut reference link/image. + If we don't, then we remove the opening delimiter from the diff --git a/tests/test_cmark_spec/test_spec/test_file.html b/tests/test_cmark_spec/test_spec/test_file.html index 1c2dc3cb..60873533 100644 --- a/tests/test_cmark_spec/test_spec/test_file.html +++ b/tests/test_cmark_spec/test_spec/test_file.html @@ -1,9 +1,9 @@

    title: CommonMark Spec author: John MacFarlane -version: 0.30 -date: '2021-06-19' -license: 'CC-BY-SA 4.0' +version: '0.31.2' +date: '2024-01-28' +license: 'CC-BY-SA 4.0' ...

    Introduction

    What is Markdown?

    @@ -11,7 +11,7 @@

    What is Markdown?

    based on conventions for indicating formatting in email and usenet posts. It was developed by John Gruber (with help from Aaron Swartz) and released in 2004 in the form of a -syntax description +syntax description and a Perl script (Markdown.pl) for converting Markdown to HTML. In the next decade, dozens of implementations were developed in many languages. Some extended the original @@ -30,10 +30,10 @@

    What is Markdown?

    Markdown-formatted document should be publishable as-is, as plain text, without looking like it's been marked up with tags or formatting instructions. -(http://daringfireball.net/projects/markdown/)

    +(https://daringfireball.net/projects/markdown/)

    The point can be illustrated by comparing a sample of -AsciiDoc with +AsciiDoc with an equivalent sample of Markdown. Here is a sample of AsciiDoc from the AsciiDoc manual:

    1. List item one.
    @@ -91,7 +91,7 @@ 

    What is Markdown?

    to read. The nesting of list items is apparent to the eye in the source, not just in the processed document.

    Why is a spec needed?

    -

    John Gruber's canonical description of Markdown's +

    John Gruber's canonical description of Markdown's syntax does not specify the syntax unambiguously. Here are some examples of questions it does not answer:

    @@ -103,7 +103,7 @@

    Why is a spec needed?

    they, too, must be indented four spaces, but Markdown.pl does not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for -users in real documents. (See this comment by John +users in real documents. (See this comment by John Gruber.)

  • @@ -112,7 +112,7 @@

    Why is a spec needed?

    this can lead to unexpected results in hard-wrapped text, and also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). -(John Gruber has also spoken in favor of requiring the blank +(John Gruber has also spoken in favor of requiring the blank lines.)

  • @@ -140,7 +140,7 @@

    Why is a spec needed?

    2. two
  • (There are some relevant comments by John Gruber -here.)

    +here.)

  • Can list markers be indented? Can ordered list markers be right-aligned?

    @@ -275,9 +275,9 @@

    Characters and lines

    A line containing no characters, or a line containing only spaces (U+0020) or tabs (U+0009), is called a blank line.

    The following definitions of character classes will be used in this spec:

    -

    A Unicode whitespace character is -any code point in the Unicode Zs general category, or a tab (U+0009), -line feed (U+000A), form feed (U+000C), or carriage return (U+000D).

    +

    A Unicode whitespace character is a character in the Unicode Zs general +category, or a tab (U+0009), line feed (U+000A), form feed (U+000C), or +carriage return (U+000D).

    Unicode whitespace is a sequence of one or more [Unicode whitespace characters].

    A tab is U+0009.

    @@ -290,9 +290,8 @@

    Characters and lines

    :, ;, <, =, >, ?, @ (U+003A–0040), [, \, ], ^, _, ` (U+005B–0060), {, |, }, or ~ (U+007B–007E).

    -

    A Unicode punctuation character is an [ASCII -punctuation character] or anything in -the general Unicode categories Pc, Pd, Pe, Pf, Pi, Po, or Ps.

    +

    A Unicode punctuation character is a character in the Unicode P +(puncuation) or S (symbol) general categories.

    Tabs

    Tabs in lines are not expanded to [spaces]. However, in contexts where spaces help to define block structure, @@ -467,9 +466,9 @@

    Backslash escapes

    <pre><code>\[\] </code></pre> -
    <http://example.com?find=\*>
    +
    <https://example.com?find=\*>
     .
    -<p><a href="http://example.com?find=%5C*">http://example.com?find=\*</a></p>
    +<p><a href="https://example.com?find=%5C*">https://example.com?find=\*</a></p>
     
    <a href="/bar\/)">
     .
    @@ -987,10 +986,7 @@ 

    Setext headings

    [list item][list items], or [HTML block][HTML blocks].

    A setext heading underline is a sequence of = characters or a sequence of - characters, with no more than 3 -spaces of indentation and any number of trailing spaces or tabs. If a line -containing a single - can be interpreted as an -empty [list items], it should be interpreted this way -and not as a [setext heading underline].

    +spaces of indentation and any number of trailing spaces or tabs.

    The heading is a level 1 heading if = characters are used in the [setext heading underline], and a level 2 heading if - characters are used. The contents of the heading are the result @@ -1461,7 +1457,7 @@

    Fenced code blocks

    opening code fence until the end of the containing block (or document). (An alternative spec would require backtracking in the event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real down side to the +much less efficient, and there seems to be no real downside to the behavior described here.)

    A fenced code block may interrupt a paragraph, and does not require a blank line either before or after.

    @@ -1786,7 +1782,7 @@

    HTML blocks

    End condition: line contains the string ]]>.

  • -

    Start condition: line begins the string < or </ +

    Start condition: line begins with the string < or </ followed by one of the strings (case-insensitive) address, article, aside, base, basefont, blockquote, body, caption, center, col, colgroup, dd, details, dialog, @@ -1795,7 +1791,7 @@

    HTML blocks

    h1, h2, h3, h4, h5, h6, head, header, hr, html, iframe, legend, li, link, main, menu, menuitem, nav, noframes, ol, optgroup, option, p, param, -section, source, summary, table, tbody, td, +search, section, summary, table, tbody, td, tfoot, th, thead, title, tr, track, ul, followed by a space, a tab, the end of the line, the string >, or the string />.
    @@ -3085,7 +3081,7 @@

    List items

    blocks Bs starting with a character other than a space or tab, and M is a list marker of width W followed by 1 ≤ N ≤ 4 spaces of indentation, then the result of prepending M and the following spaces to the first line -of Ls*, and indenting subsequent lines of Ls by W + N spaces, is a +of Ls, and indenting subsequent lines of Ls by W + N spaces, is a list item with Bs as its contents. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a start @@ -4081,10 +4077,10 @@

    Lists

    Since it is well established Markdown practice to allow lists to interrupt paragraphs inside list items, the [principle of uniformity] requires us to allow this outside list items as -well. (reStructuredText +well. (reStructuredText takes a different approach, requiring blank lines before lists even inside other list items.)

    -

    In order to solve of unwanted lists in paragraphs with +

    In order to solve the problem of unwanted lists in paragraphs with hard-wrapped numerals, we allow only lists starting with 1 to interrupt paragraphs. Thus,

    The number of windows in my house is
    @@ -4631,14 +4627,14 @@ 

    Code spans

    <p><a href="`">`</p>

    And this is code:

    -
    `<http://foo.bar.`baz>`
    +
    `<https://foo.bar.`baz>`
     .
    -<p><code>&lt;http://foo.bar.</code>baz&gt;`</p>
    +<p><code>&lt;https://foo.bar.</code>baz&gt;`</p>
     

    But this is an autolink:

    -
    <http://foo.bar.`baz>`
    +
    <https://foo.bar.`baz>`
     .
    -<p><a href="http://foo.bar.%60baz">http://foo.bar.`baz</a>`</p>
    +<p><a href="https://foo.bar.%60baz">https://foo.bar.`baz</a>`</p>
     

    When a backtick string is not closed by a matching backtick string, we just have literal backticks:

    @@ -4657,7 +4653,7 @@

    Code spans

    <p>`foo<code>bar</code></p>

    Emphasis and strong emphasis

    -

    John Gruber's original Markdown syntax +

    John Gruber's original Markdown syntax description says:

    Markdown treats asterisks (*) and underscores (_) as indicators of @@ -4744,7 +4740,7 @@

    Emphasis and strong emphasis

    (The idea of distinguishing left-flanking and right-flanking delimiter runs based on the character before and the character after comes from Roopesh Chander's -vfmd. +vfmd. vfmd uses the terminology "emphasis indicator string" instead of "delimiter run," and its rules for distinguishing left- and right-flanking runs are a bit more complex than the ones given here.)

    @@ -4887,6 +4883,17 @@

    Emphasis and strong emphasis

    . <p>* a *</p>
    +

    Unicode symbols count as punctuation, too:

    +
    *$*alpha.
    +
    +*£*bravo.
    +
    +*€*charlie.
    +.
    +<p>*$*alpha.</p>
    +<p>*£*bravo.</p>
    +<p>*€*charlie.</p>
    +

    Intraword emphasis with * is permitted:

    foo*bar*
     .
    @@ -5521,13 +5528,13 @@ 

    Emphasis and strong emphasis

    . <p><em>a <code>_</code></em></p>
    -
    **a<http://foo.bar/?q=**>
    +
    **a<https://foo.bar/?q=**>
     .
    -<p>**a<a href="http://foo.bar/?q=**">http://foo.bar/?q=**</a></p>
    +<p>**a<a href="https://foo.bar/?q=**">https://foo.bar/?q=**</a></p>
     
    -
    __a<http://foo.bar/?q=__>
    +
    __a<https://foo.bar/?q=__>
     .
    -<p>__a<a href="http://foo.bar/?q=__">http://foo.bar/?q=__</a></p>
    +<p>__a<a href="https://foo.bar/?q=__">https://foo.bar/?q=__</a></p>
     

    Links

    A link contains [link text] (the visible text), a [link destination] @@ -5720,13 +5727,13 @@

    Links

    A link can contain fragment identifiers and queries:

    [link](#fragment)
     
    -[link](http://example.com#fragment)
    +[link](https://example.com#fragment)
     
    -[link](http://example.com?foo=3#frag)
    +[link](https://example.com?foo=3#frag)
     .
     <p><a href="#fragment">link</a></p>
    -<p><a href="http://example.com#fragment">link</a></p>
    -<p><a href="http://example.com?foo=3#frag">link</a></p>
    +<p><a href="https://example.com#fragment">link</a></p>
    +<p><a href="https://example.com?foo=3#frag">link</a></p>
     

    Note that a backslash before a non-escapable character is just a backslash:

    @@ -5878,9 +5885,9 @@

    Links

    . <p>[foo<code>](/uri)</code></p>
    -
    [foo<http://example.com/?search=](uri)>
    +
    [foo<https://example.com/?search=](uri)>
     .
    -<p>[foo<a href="http://example.com/?search=%5D(uri)">http://example.com/?search=](uri)</a></p>
    +<p>[foo<a href="https://example.com/?search=%5D(uri)">https://example.com/?search=](uri)</a></p>
     

    There are three kinds of reference links: full, collapsed, @@ -5985,11 +5992,11 @@

    Links

    . <p>[foo<code>][ref]</code></p>
    -
    [foo<http://example.com/?search=][ref]>
    +
    [foo<https://example.com/?search=][ref]>
     
     [ref]: /uri
     .
    -<p>[foo<a href="http://example.com/?search=%5D%5Bref%5D">http://example.com/?search=][ref]</a></p>
    +<p>[foo<a href="https://example.com/?search=%5D%5Bref%5D">https://example.com/?search=][ref]</a></p>
     

    Matching is case-insensitive:

    [foo][BaR]
    @@ -6132,7 +6139,7 @@ 

    Links

    consists of a [link label] that [matches] a [link reference definition] elsewhere in the document, followed by the string []. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching reference link definition. Thus, [foo][] is equivalent to [foo][foo].

    @@ -6169,7 +6176,7 @@

    Links

    consists of a [link label] that [matches] a [link reference definition] elsewhere in the document and is not followed by [] or a link label. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching link reference definition. Thus, [foo] is equivalent to [foo][].

    @@ -6227,7 +6234,7 @@

    Links

    . <p>*<a href="/url">foo*</a></p>
    -

    Full and compact references take precedence over shortcut +

    Full and collapsed references take precedence over shortcut references:

    [foo][bar]
     
    @@ -6438,7 +6445,7 @@ 

    Autolinks

    a link to the URI, with the URI as the link's label.

    An absolute URI, for these purposes, consists of a [scheme] followed by a colon (:) -followed by zero or more characters other [ASCII control +followed by zero or more characters other than [ASCII control characters][ASCII control character], [space], <, and >. If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space).

    @@ -6451,9 +6458,9 @@

    Autolinks

    . <p><a href="http://foo.bar.baz">http://foo.bar.baz</a></p>
    -
    <http://foo.bar.baz/test?q=hello&id=22&boolean>
    +
    <https://foo.bar.baz/test?q=hello&id=22&boolean>
     .
    -<p><a href="http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean">http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean</a></p>
    +<p><a href="https://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean">https://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean</a></p>
     
    <irc://foo.bar:2233/baz>
     .
    @@ -6476,23 +6483,23 @@ 

    Autolinks

    . <p><a href="made-up-scheme://foo,bar">made-up-scheme://foo,bar</a></p>
    -
    <http://../>
    +
    <https://../>
     .
    -<p><a href="http://../">http://../</a></p>
    +<p><a href="https://../">https://../</a></p>
     
    <localhost:5001/foo>
     .
     <p><a href="localhost:5001/foo">localhost:5001/foo</a></p>
     

    Spaces are not allowed in autolinks:

    -
    <http://foo.bar/baz bim>
    +
    <https://foo.bar/baz bim>
     .
    -<p>&lt;http://foo.bar/baz bim&gt;</p>
    +<p>&lt;https://foo.bar/baz bim&gt;</p>
     

    Backslash-escapes do not work inside autolinks:

    -
    <http://example.com/\[\>
    +
    <https://example.com/\[\>
     .
    -<p><a href="http://example.com/%5C%5B%5C">http://example.com/\[\</a></p>
    +<p><a href="https://example.com/%5C%5B%5C">https://example.com/\[\</a></p>
     

    An email autolink consists of <, followed by an [email address], @@ -6524,9 +6531,9 @@

    Autolinks

    . <p>&lt;&gt;</p>
    -
    < http://foo.bar >
    +
    < https://foo.bar >
     .
    -<p>&lt; http://foo.bar &gt;</p>
    +<p>&lt; https://foo.bar &gt;</p>
     
    <m:abc>
     .
    @@ -6536,9 +6543,9 @@ 

    Autolinks

    . <p>&lt;foo.bar.baz&gt;</p>
    -
    http://example.com
    +
    https://example.com
     .
    -<p>http://example.com</p>
    +<p>https://example.com</p>
     
    foo@bar.example.com
     .
    @@ -6582,10 +6589,9 @@ 

    Raw HTML

    A closing tag consists of the string </, a [tag name], optional spaces, tabs, and up to one line ending, and the character >.

    -

    An HTML comment consists of <!-- + text + -->, -where text does not start with > or ->, does not end with -, -and does not contain --. (See the -HTML5 spec.)

    +

    An HTML comment consists of <!-->, <!--->, or <!--, a string of +characters not including the string -->, and --> (see the +HTML spec).

    A processing instruction consists of the string <?, a string of characters not including the string ?>, and the string @@ -6669,23 +6675,18 @@

    Raw HTML

    <p>&lt;/a href=&quot;foo&quot;&gt;</p>

    Comments:

    -
    foo <!-- this is a
    -comment - with hyphen -->
    -.
    -<p>foo <!-- this is a
    -comment - with hyphen --></p>
    -
    -
    foo <!-- not a comment -- two hyphens -->
    +
    foo <!-- this is a --
    +comment - with hyphens -->
     .
    -<p>foo &lt;!-- not a comment -- two hyphens --&gt;</p>
    +<p>foo <!-- this is a --
    +comment - with hyphens --></p>
     
    -

    Not comments:

    foo <!--> foo -->
     
    -foo <!-- foo--->
    +foo <!---> foo -->
     .
    -<p>foo &lt;!--&gt; foo --&gt;</p>
    -<p>foo &lt;!-- foo---&gt;</p>
    +<p>foo <!--> foo --&gt;</p>
    +<p>foo <!---> foo --&gt;</p>
     

    Processing instructions:

    foo <?php echo $a; ?>
    @@ -7069,7 +7070,7 @@ 

    look for link or image

  • If we find one and it's active, then we parse ahead to see if -we have an inline link/image, reference link/image, compact reference +we have an inline link/image, reference link/image, collapsed reference link/image, or shortcut reference link/image.