diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 77ffff89..e46c0847 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,7 +7,7 @@ on: push: branches: [master] tags: - - 'v*' + - "v[0-9]+.[0-9]+.[0-9]+*" pull_request: schedule: - cron: '0 0 * * 0' # every week @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['pypy-3.6', '3.6', '3.7', '3.8', '3.9', '3.10'] + python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10'] steps: - uses: actions/checkout@v2 @@ -113,16 +113,24 @@ jobs: steps: - name: Checkout source uses: actions/checkout@v2 - - name: Set up Python 3.7 + - name: Set up Python 3.8 uses: actions/setup-python@v2 with: - python-version: '3.7' - - name: Build package + python-version: '3.8' + - name: install flit run: | - pip install build - python -m build - - name: Publish - uses: pypa/gh-action-pypi-publish@v1.1.0 - with: - user: __token__ - password: ${{ secrets.PYPI_KEY }} + pip install flit~=3.4 + - name: Build and publish + run: | + flit publish + env: + FLIT_USERNAME: __token__ + FLIT_PASSWORD: ${{ secrets.PYPI_KEY }} + + allgood: + runs-on: ubuntu-latest + needs: + - pre-commit + - tests + steps: + - run: echo "Great success!" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index da6ae0ac..b9b3f0c0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,40 +16,31 @@ exclude: > repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.2.0 hooks: - id: check-json - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/mgedmin/check-manifest - rev: "0.46" + - repo: https://github.com/pycqa/isort + rev: 5.10.1 hooks: - - id: check-manifest - args: [--no-build-isolation] - additional_dependencies: [setuptools>=46.4.0] - - # this is not used for now, - # since it converts markdown-it-py to markdown_it_py and removes comments - # - repo: https://github.com/asottile/setup-cfg-fmt - # rev: v1.17.0 - # hooks: - # - id: setup-cfg-fmt + - id: isort - repo: https://github.com/psf/black - rev: 20.8b1 + rev: 22.3.0 hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.1 + rev: 3.9.2 hooks: - id: flake8 additional_dependencies: [flake8-bugbear==21.3.1] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.812 + rev: v0.942 hooks: - id: mypy additional_dependencies: [attrs] diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e463bab..1aa4a725 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Change Log +## 2.1.0 - 2022-04-15 + +This release is primarily to replace the `attrs` package dependency, +with the built-in Python `dataclasses` package. + +This should not be a breaking change, for most use cases. + +- ⬆️ UPGRADE: Drop support for EOL Python 3.6 (#194) +- ♻️ REFACTOR: Move `Rule`/`Delimiter` classes from `attrs` to `dataclass` (#211) +- ♻️ REFACTOR: Move `Token` class from `attrs` to `dataclass` (#211) +- ‼️ Remove deprecated `NestedTokens` and `nest_tokens` +- ✨ NEW: Save ordered list numbering (#192) +- 🐛 FIX: Combination of blockquotes, list and newlines causes `IndexError` (#207) + ## 2.0.1 - 2022-24-01 - 🐛 FIX: Crash when file ends with empty blockquote line. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 99f9f382..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,23 +0,0 @@ -exclude docs -recursive-exclude docs * -exclude tests -recursive-exclude tests * -exclude benchmarking -recursive-exclude benchmarking * - -exclude .pre-commit-config.yaml -exclude .readthedocs.yml -exclude tox.ini -exclude docstring.fmt.mustache -exclude .flake8 -exclude .circleci -exclude .circleci/config.yml -exclude codecov.yml -exclude .mypy.ini - -include LICENSE -include LICENSE.markdown-it -include CHANGELOG.md - -include markdown_it/py.typed -include markdown_it/port.yaml diff --git a/benchmarking/bench_core.py b/benchmarking/bench_core.py index d8808d59..6834989f 100644 --- a/benchmarking/bench_core.py +++ b/benchmarking/bench_core.py @@ -1,4 +1,5 @@ from pathlib import Path + import pytest import markdown_it diff --git a/benchmarking/bench_packages.py b/benchmarking/bench_packages.py index afa39ead..1158750e 100644 --- a/benchmarking/bench_packages.py +++ b/benchmarking/bench_packages.py @@ -1,7 +1,8 @@ from pathlib import Path -import pytest from shutil import which +import pytest + @pytest.fixture def spec_text(): diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 00000000..9a16010b --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,5 @@ +.code-cell > .highlight > pre { + border-left-color: green; + border-left-width: medium; + border-left-style: solid; +} diff --git a/docs/architecture.md b/docs/architecture.md index bfc49c27..bebcf9dc 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -2,6 +2,7 @@ # markdown-it design principles +(md/data-flow)= ## Data flow Input data is parsed via nested chains of rules. There are 3 nested chains - @@ -157,10 +158,9 @@ renderer override, but can be more simple. You also can write your own renderer to generate other formats than HTML, such as JSON/XML... You can even use it to generate AST. - ## Summary -This was mentioned in [Data flow](#data-flow), but let's repeat sequence again: +This was mentioned in [Data flow](md/data-flow), but let's repeat sequence again: 1. Blocks are parsed, and top level of token stream filled with block tokens. 2. Content on inline containers is parsed, filling `.children` properties. diff --git a/docs/conf.py b/docs/conf.py index 1040838e..786eff04 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,25 +33,25 @@ "sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", - "myst_nb", + "myst_parser", "sphinx_copybutton", - "sphinx_panels", + "sphinx_design", ] -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +nitpicky = True nitpick_ignore = [ ("py:class", "Match"), + ("py:class", "Path"), ("py:class", "x in the interval [0, 1)."), ("py:class", "markdown_it.helpers.parse_link_destination._Result"), ("py:class", "markdown_it.helpers.parse_link_title._Result"), ("py:class", "MarkdownIt"), + ("py:class", "RuleFunc"), ("py:class", "_NodeType"), ("py:class", "typing_extensions.Protocol"), ] @@ -70,7 +70,8 @@ "repository_branch": "master", "path_to_docs": "docs", } -panels_add_boostrap_css = False +html_static_path = ["_static"] +html_css_files = ["custom.css"] # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -91,6 +92,7 @@ def run_apidoc(app): """ import os import shutil + import sphinx from sphinx.ext import apidoc @@ -100,7 +102,7 @@ def run_apidoc(app): this_folder = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) api_folder = os.path.join(this_folder, "api") module_path = os.path.normpath(os.path.join(this_folder, "../")) - ignore_paths = ["../setup.py", "../conftest.py", "../tests", "../benchmarking"] + ignore_paths = ["../profiler.py", "../conftest.py", "../tests", "../benchmarking"] ignore_paths = [ os.path.normpath(os.path.join(this_folder, p)) for p in ignore_paths ] @@ -120,6 +122,7 @@ def run_apidoc(app): argv = ["-M", "--separate", "-o", api_folder, module_path] + ignore_paths + apidoc.OPTIONS.append("ignore-module-all") apidoc.main(argv) # we don't use this @@ -131,3 +134,17 @@ def setup(app): """Add functions to the Sphinx setup.""" if os.environ.get("SKIP_APIDOC", None) is None: app.connect("builder-inited", run_apidoc) + + from sphinx.directives.code import CodeBlock + + class CodeCell(CodeBlock): + """Custom code block directive.""" + + def run(self): + """Run the directive.""" + self.options["class"] = ["code-cell"] + return super().run() + + # note, these could be run by myst-nb, + # but currently this causes a circular dependency issue + app.add_directive("code-cell", CodeCell) diff --git a/docs/contributing.md b/docs/contributing.md index 9469d0ee..6c43e0e0 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -29,7 +29,6 @@ It can be setup by: Editors like VS Code also have automatic code reformat utilities, which can adhere to this standard. All functions and class methods should be annotated with types and include a docstring. -The prefered docstring format is outlined in `markdown-it-py/docstring.fmt.mustache` and can be used automatically with the [autodocstring](https://marketplace.visualstudio.com/items?itemName=njpwerner.autodocstring) VS Code extension. ## Testing diff --git a/docs/other.md b/docs/other.md index 8db5a4a4..4d77360f 100644 --- a/docs/other.md +++ b/docs/other.md @@ -38,24 +38,22 @@ So, if you decide to use plugins that add extended class syntax or autogeneratin # Performance -markdown-it-py is the fastest _**CommonMark compliant**_ parser written in python! - You can view our continuous integration benchmarking analysis at: , or you can run it for yourself within the repository: ```console $ tox -e py38-bench-packages -- --benchmark-columns mean,stddev -Name (time in ms) Mean StdDev ------------------------------------------------------------------ -test_mistune 82.0024 (1.0) 10.7779 (1.61) -test_markdown_it_py 190.9571 (2.33) 6.6946 (1.0) -test_mistletoe 247.1633 (3.01) 16.3956 (2.45) -test_commonmark_py 482.6411 (5.89) 67.8219 (10.13) -test_panflute 1,043.0018 (12.72) 229.1034 (34.22) -test_pymarkdown 964.6831 (11.76) 77.2787 (11.54) -test_pymarkdown_extra 1,051.8680 (12.83) 32.2971 (4.82) ------------------------------------------------------------------ +Name (time in ms) Mean StdDev +--------------------------------------------------------------- +test_mistune 70.3272 (1.0) 0.7978 (1.0) +test_mistletoe 116.0919 (1.65) 6.2870 (7.88) +test_markdown_it_py 152.9022 (2.17) 4.2988 (5.39) +test_commonmark_py 326.9506 (4.65) 15.8084 (19.81) +test_pymarkdown 368.2712 (5.24) 7.5906 (9.51) +test_pymarkdown_extra 640.4913 (9.11) 15.1769 (19.02) +test_panflute 678.3547 (9.65) 9.4622 (11.86) +--------------------------------------------------------------- ``` As you can see, `markdown-it-py` doesn't pay with speed for it's flexibility. @@ -63,6 +61,6 @@ As you can see, `markdown-it-py` doesn't pay with speed for it's flexibility. ```{note} `mistune` is not CommonMark compliant, which is what allows for its faster parsing, at the expense of issues, for example, with nested inline parsing. -See [mistletoes's explanation](https://github.com/miyuchina/mistletoe#performance) +See [mistletoes's explanation](https://github.com/miyuchina/mistletoe/blob/master/performance.md) for further details. ``` diff --git a/docs/using.md b/docs/using.md index 5c977b5a..83872037 100644 --- a/docs/using.md +++ b/docs/using.md @@ -16,7 +16,7 @@ kernelspec: > This document can be opened to execute with [Jupytext](https://jupytext.readthedocs.io)! -markdown-it-py may be used as an API *via* the `markdown_it` package. +markdown-it-py may be used as an API *via* the [`markdown-it-py`](https://pypi.org/project/markdown-it-py/) package. The raw text is first parsed to syntax 'tokens', then these are converted to other formats using 'renderers'. diff --git a/docstring.fmt.mustache b/docstring.fmt.mustache deleted file mode 100644 index 717a4572..00000000 --- a/docstring.fmt.mustache +++ /dev/null @@ -1,20 +0,0 @@ -{{! Sphinx Docstring Template }} -{{summaryPlaceholder}} - -{{extendedSummaryPlaceholder}} - -{{#args}} -:param {{var}}: {{descriptionPlaceholder}} -{{/args}} -{{#kwargs}} -:param {{var}}: {{descriptionPlaceholder}} -{{/kwargs}} -{{#exceptions}} -:raises {{type}}: {{descriptionPlaceholder}} -{{/exceptions}} -{{#returns}} -:return: {{descriptionPlaceholder}} -{{/returns}} -{{#yields}} -:yield: {{descriptionPlaceholder}} -{{/yields}} diff --git a/markdown_it/__init__.py b/markdown_it/__init__.py index 887f4f53..5cc232a5 100644 --- a/markdown_it/__init__.py +++ b/markdown_it/__init__.py @@ -1,5 +1,5 @@ """A Python port of Markdown-It""" __all__ = ("MarkdownIt",) -__version__ = "2.0.1" +__version__ = "2.1.0" from .main import MarkdownIt diff --git a/markdown_it/_compat.py b/markdown_it/_compat.py new file mode 100644 index 00000000..12df1aa6 --- /dev/null +++ b/markdown_it/_compat.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from collections.abc import Mapping +import sys +from typing import Any + +if sys.version_info >= (3, 10): + DATACLASS_KWARGS: Mapping[str, Any] = {"slots": True} +else: + DATACLASS_KWARGS: Mapping[str, Any] = {} diff --git a/markdown_it/cli/parse.py b/markdown_it/cli/parse.py index 353c526f..2d74f55a 100644 --- a/markdown_it/cli/parse.py +++ b/markdown_it/cli/parse.py @@ -4,18 +4,19 @@ Parse one or more markdown files, convert each to HTML, and print to stdout. """ +from __future__ import annotations + import argparse +from collections.abc import Iterable, Sequence import sys -from typing import Iterable, Optional, Sequence from markdown_it import __version__ from markdown_it.main import MarkdownIt - version_str = "markdown-it-py [version {}]".format(__version__) -def main(args: Optional[Sequence[str]] = None) -> int: +def main(args: Sequence[str] | None = None) -> int: namespace = parse_args(args) if namespace.filenames: convert(namespace.filenames) @@ -63,7 +64,7 @@ def interactive() -> None: break -def parse_args(args: Optional[Sequence[str]]) -> argparse.Namespace: +def parse_args(args: Sequence[str] | None) -> argparse.Namespace: """Parse input CLI arguments.""" parser = argparse.ArgumentParser( description="Parse one or more markdown files, " diff --git a/markdown_it/common/html_blocks.py b/markdown_it/common/html_blocks.py index 4246f788..8b199af3 100644 --- a/markdown_it/common/html_blocks.py +++ b/markdown_it/common/html_blocks.py @@ -1,4 +1,4 @@ -"""List of valid html blocks names, accorting to commonmark spec +"""List of valid html blocks names, according to commonmark spec http://jgm.github.io/CommonMark/spec.html#html-blocks """ diff --git a/markdown_it/common/normalize_url.py b/markdown_it/common/normalize_url.py index d1ab85e3..afec9284 100644 --- a/markdown_it/common/normalize_url.py +++ b/markdown_it/common/normalize_url.py @@ -1,12 +1,13 @@ +from __future__ import annotations + +from collections.abc import Callable import re -from typing import Callable, Optional -from urllib.parse import urlparse, urlunparse, quote, unquote # noqa: F401 +from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401 import mdurl from .. import _punycode - RECODE_HOSTNAME_FOR = ("http:", "https:", "mailto:") @@ -67,7 +68,7 @@ def normalizeLinkText(url: str) -> str: GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);") -def validateLink(url: str, validator: Optional[Callable] = None) -> bool: +def validateLink(url: str, validator: Callable | None = None) -> bool: """Validate URL link is allowed in output. This validator can prohibit more than really needed to prevent XSS. diff --git a/markdown_it/helpers/__init__.py b/markdown_it/helpers/__init__.py index f76bd444..3dbbdd1d 100644 --- a/markdown_it/helpers/__init__.py +++ b/markdown_it/helpers/__init__.py @@ -1,6 +1,6 @@ """Functions for parsing Links """ __all__ = ("parseLinkLabel", "parseLinkDestination", "parseLinkTitle") -from .parse_link_label import parseLinkLabel from .parse_link_destination import parseLinkDestination +from .parse_link_label import parseLinkLabel from .parse_link_title import parseLinkTitle diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index 74dbec08..58b76f3c 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -2,7 +2,7 @@ Parse link destination """ -from ..common.utils import unescapeAll, charCodeAt +from ..common.utils import charCodeAt, unescapeAll class _Result: diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py index 0cb1365b..842c83bc 100644 --- a/markdown_it/helpers/parse_link_title.py +++ b/markdown_it/helpers/parse_link_title.py @@ -1,6 +1,6 @@ """Parse link title """ -from ..common.utils import unescapeAll, charCodeAt +from ..common.utils import charCodeAt, unescapeAll class _Result: diff --git a/markdown_it/main.py b/markdown_it/main.py index e87a7a44..7faac5ad 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -1,25 +1,17 @@ +from __future__ import annotations + +from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping from contextlib import contextmanager -from typing import ( - Any, - Callable, - Dict, - Generator, - Iterable, - List, - Mapping, - MutableMapping, - Optional, - Union, -) +from typing import Any from . import helpers, presets # noqa F401 from .common import normalize_url, utils # noqa F401 -from .token import Token -from .parser_core import ParserCore # noqa F401 from .parser_block import ParserBlock # noqa F401 +from .parser_core import ParserCore # noqa F401 from .parser_inline import ParserInline # noqa F401 -from .rules_core.state_core import StateCore from .renderer import RendererHTML, RendererProtocol +from .rules_core.state_core import StateCore +from .token import Token from .utils import OptionsDict try: @@ -40,10 +32,10 @@ class MarkdownIt: def __init__( self, - config: Union[str, Mapping] = "commonmark", - options_update: Optional[Mapping] = None, + config: str | Mapping = "commonmark", + options_update: Mapping | None = None, *, - renderer_cls: Callable[["MarkdownIt"], RendererProtocol] = RendererHTML, + renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML, ): """Main parser class @@ -94,8 +86,8 @@ def set(self, options: MutableMapping) -> None: self.options = OptionsDict(options) def configure( - self, presets: Union[str, Mapping], options_update: Optional[Mapping] = None - ) -> "MarkdownIt": + self, presets: str | Mapping, options_update: Mapping | None = None + ) -> MarkdownIt: """Batch load of all options and component settings. This is an internal method, and you probably will not need it. But if you will - see available presets and data structure @@ -131,7 +123,7 @@ def configure( return self - def get_all_rules(self) -> Dict[str, List[str]]: + def get_all_rules(self) -> dict[str, list[str]]: """Return the names of all active rules.""" rules = { chain: self[chain].ruler.get_all_rules() @@ -140,7 +132,7 @@ def get_all_rules(self) -> Dict[str, List[str]]: rules["inline2"] = self.inline.ruler2.get_all_rules() return rules - def get_active_rules(self) -> Dict[str, List[str]]: + def get_active_rules(self) -> dict[str, list[str]]: """Return the names of all active rules.""" rules = { chain: self[chain].ruler.get_active_rules() @@ -150,8 +142,8 @@ def get_active_rules(self) -> Dict[str, List[str]]: return rules def enable( - self, names: Union[str, Iterable[str]], ignoreInvalid: bool = False - ) -> "MarkdownIt": + self, names: str | Iterable[str], ignoreInvalid: bool = False + ) -> MarkdownIt: """Enable list or rules. (chainable) :param names: rule name or list of rule names to enable. @@ -182,8 +174,8 @@ def enable( return self def disable( - self, names: Union[str, Iterable[str]], ignoreInvalid: bool = False - ) -> "MarkdownIt": + self, names: str | Iterable[str], ignoreInvalid: bool = False + ) -> MarkdownIt: """The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable) :param names: rule name or list of rule names to disable. @@ -222,7 +214,7 @@ def add_render_rule(self, name: str, function: Callable, fmt: str = "html") -> N if self.renderer.__output__ == fmt: self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore - def use(self, plugin: Callable, *params, **options) -> "MarkdownIt": + def use(self, plugin: Callable, *params, **options) -> MarkdownIt: """Load specified plugin with given params into current parser instance. (chainable) It's just a sugar to call `plugin(md, params)` with curring. @@ -237,7 +229,7 @@ def func(tokens, idx): plugin(self, *params, **options) return self - def parse(self, src: str, env: Optional[MutableMapping] = None) -> List[Token]: + def parse(self, src: str, env: MutableMapping | None = None) -> list[Token]: """Parse the source string to a token stream :param src: source string @@ -260,7 +252,7 @@ def parse(self, src: str, env: Optional[MutableMapping] = None) -> List[Token]: self.core.process(state) return state.tokens - def render(self, src: str, env: Optional[MutableMapping] = None) -> Any: + def render(self, src: str, env: MutableMapping | None = None) -> Any: """Render markdown string into html. It does all magic for you :). :param src: source string @@ -274,9 +266,7 @@ def render(self, src: str, env: Optional[MutableMapping] = None) -> Any: env = {} if env is None else env return self.renderer.render(self.parse(src, env), self.options, env) - def parseInline( - self, src: str, env: Optional[MutableMapping] = None - ) -> List[Token]: + def parseInline(self, src: str, env: MutableMapping | None = None) -> list[Token]: """The same as [[MarkdownIt.parse]] but skip all block rules. :param src: source string @@ -296,7 +286,7 @@ def parseInline( self.core.process(state) return state.tokens - def renderInline(self, src: str, env: Optional[MutableMapping] = None) -> Any: + def renderInline(self, src: str, env: MutableMapping | None = None) -> Any: """Similar to [[MarkdownIt.render]] but for single paragraph content. :param src: source string diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 8c0e8ab3..f331ec54 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -1,16 +1,17 @@ """Block-level tokenizer.""" +from __future__ import annotations + import logging -from typing import List, Optional, Tuple +from . import rules_block from .ruler import Ruler -from .token import Token from .rules_block.state_block import StateBlock -from . import rules_block +from .token import Token LOGGER = logging.getLogger(__name__) -_rules: List[Tuple] = [ +_rules: list[tuple] = [ # First 2 params - rule name & source. Secondary array - list of rules, # which can be terminated by this one. ("table", rules_block.table, ["paragraph", "reference"]), @@ -97,9 +98,9 @@ def parse( src: str, md, env, - outTokens: List[Token], - ords: Optional[Tuple[int, ...]] = None, - ) -> Optional[List[Token]]: + outTokens: list[Token], + ords: tuple[int, ...] | None = None, + ) -> list[Token] | None: """Process input string and push block tokens into `outTokens`.""" if not src: return None diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index 03982b5c..32209b32 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -4,14 +4,13 @@ * Top-level rules executor. Glues block/inline parsers and does intermediate * transformations. """ -from typing import List, Tuple +from __future__ import annotations -from .ruler import Ruler, RuleFunc +from .ruler import RuleFunc, Ruler +from .rules_core import block, inline, linkify, normalize, replace, smartquotes from .rules_core.state_core import StateCore -from .rules_core import normalize, block, inline, replace, smartquotes, linkify - -_rules: List[Tuple[str, RuleFunc]] = [ +_rules: list[tuple[str, RuleFunc]] = [ ("normalize", normalize), ("block", block), ("inline", inline), diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index 30fcff98..b61c990b 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -1,14 +1,14 @@ """Tokenizes paragraph content. """ -from typing import List, Tuple +from __future__ import annotations -from .ruler import Ruler, RuleFunc -from .token import Token -from .rules_inline.state_inline import StateInline from . import rules_inline +from .ruler import RuleFunc, Ruler +from .rules_inline.state_inline import StateInline +from .token import Token # Parser rules -_rules: List[Tuple[str, RuleFunc]] = [ +_rules: list[tuple[str, RuleFunc]] = [ ("text", rules_inline.text), ("newline", rules_inline.newline), ("escape", rules_inline.escape), @@ -22,7 +22,7 @@ ("entity", rules_inline.entity), ] -_rules2: List[Tuple[str, RuleFunc]] = [ +_rules2: list[tuple[str, RuleFunc]] = [ ("balance_pairs", rules_inline.link_pairs), ("strikethrough", rules_inline.strikethrough.postProcess), ("emphasis", rules_inline.emphasis.postProcess), @@ -114,7 +114,7 @@ def tokenize(self, state: StateInline) -> None: if state.pending: state.pushPending() - def parse(self, src: str, md, env, tokens: List[Token]) -> List[Token]: + def parse(self, src: str, md, env, tokens: list[Token]) -> list[Token]: """Process input string and push inline tokens into `tokens`""" state = StateInline(src, md, env, tokens) self.tokenize(state) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index dd707761..a6718fda 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it - version: 12.1.0 - commit: e5986bb7cca20ac95dc81e4741c08949bf01bb77 - date: Jul 15, 2021 + version: 12.2.0 + commit: 6e2de08a0b03d3d0dcc524b89710ce05f83a0283 + date: Aug 2, 2021 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` @@ -45,5 +45,5 @@ `MarkdownIt.add_render_rule(name, function, fmt="html")`, rather than `MarkdownIt.renderer.rules[name] = function` and renderers should declare a class property `__output__ = "html"`. - This allows for extensability to more than just HTML renderers + This allows for extensibility to more than just HTML renderers - inline tokens in tables are assigned a map (this is helpful for propagation to children) diff --git a/markdown_it/presets/default.py b/markdown_it/presets/default.py index 3f347913..59f4855e 100644 --- a/markdown_it/presets/default.py +++ b/markdown_it/presets/default.py @@ -23,7 +23,7 @@ def make(): "breaks": False, # Convert '\n' in paragraphs into
"langPrefix": "language-", # CSS language prefix for fenced blocks # Highlighter function. Should return escaped HTML, - # or '' if the source string is not changed and should be escaped externaly. + # or '' if the source string is not changed and should be escaped externally. # If result starts with "langPrefix": "language-", # CSS language prefix for fenced blocks # Highlighter function. Should return escaped HTML, - # or '' if the source string is not changed and should be escaped externaly. + # or '' if the source string is not changed and should be escaped externally. # If result starts with str: def renderInlineAsText( self, - tokens: Optional[Sequence[Token]], + tokens: Sequence[Token] | None, options: OptionsDict, env: MutableMapping, ) -> str: diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 997c95dc..11b937a0 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -9,33 +9,28 @@ class Ruler - enable/disable rules - add/replace rules - allow assign rules to additional named chains (in the same) -- cacheing lists of active rules +- caching lists of active rules You will not need use this class directly until write plugins. For simple rules control use [[MarkdownIt.disable]], [[MarkdownIt.enable]] and [[MarkdownIt.use]]. """ -from typing import ( - Callable, - Dict, - Iterable, - List, - MutableMapping, - Optional, - Tuple, - TYPE_CHECKING, - Union, -) -import attr +from __future__ import annotations + +from collections.abc import Callable, Iterable, MutableMapping +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +from markdown_it._compat import DATACLASS_KWARGS if TYPE_CHECKING: from markdown_it import MarkdownIt class StateBase: - srcCharCode: Tuple[int, ...] + srcCharCode: tuple[int, ...] - def __init__(self, src: str, md: "MarkdownIt", env: MutableMapping): + def __init__(self, src: str, md: MarkdownIt, env: MutableMapping): self.src = src self.env = env self.md = md @@ -57,22 +52,22 @@ def src(self, value: str) -> None: RuleFunc = Callable -@attr.s(slots=True) +@dataclass(**DATACLASS_KWARGS) class Rule: - name: str = attr.ib() - enabled: bool = attr.ib() - fn: RuleFunc = attr.ib(repr=False) - alt: List[str] = attr.ib() + name: str + enabled: bool + fn: RuleFunc = field(repr=False) + alt: list[str] class Ruler: def __init__(self): # List of added rules. - self.__rules__: List[Rule] = [] + self.__rules__: list[Rule] = [] # Cached rule chains. # First level - chain name, '' for default. # Second level - diginal anchor for fast filtering by charcodes. - self.__cache__: Optional[Dict[str, List[RuleFunc]]] = None + self.__cache__: dict[str, list[RuleFunc]] | None = None def __find__(self, name: str) -> int: """Find rule index by name""" @@ -161,7 +156,7 @@ def push(self, ruleName: str, fn: RuleFunc, options=None): self.__rules__.append(Rule(ruleName, True, fn, (options or {}).get("alt", []))) self.__cache__ = None - def enable(self, names: Union[str, Iterable[str]], ignoreInvalid: bool = False): + def enable(self, names: str | Iterable[str], ignoreInvalid: bool = False): """Enable rules with given names. :param names: name or list of rule names to enable. @@ -183,7 +178,7 @@ def enable(self, names: Union[str, Iterable[str]], ignoreInvalid: bool = False): self.__cache__ = None return result - def enableOnly(self, names: Union[str, Iterable[str]], ignoreInvalid: bool = False): + def enableOnly(self, names: str | Iterable[str], ignoreInvalid: bool = False): """Enable rules with given names, and disable everything else. :param names: name or list of rule names to enable. @@ -197,7 +192,7 @@ def enableOnly(self, names: Union[str, Iterable[str]], ignoreInvalid: bool = Fal rule.enabled = False self.enable(names, ignoreInvalid) - def disable(self, names: Union[str, Iterable[str]], ignoreInvalid: bool = False): + def disable(self, names: str | Iterable[str], ignoreInvalid: bool = False): """Disable rules with given names. :param names: name or list of rule names to enable. @@ -219,7 +214,7 @@ def disable(self, names: Union[str, Iterable[str]], ignoreInvalid: bool = False) self.__cache__ = None return result - def getRules(self, chainName: str) -> List[RuleFunc]: + def getRules(self, chainName: str) -> list[RuleFunc]: """Return array of active functions (rules) for given chain name. It analyzes rules configuration, compiles caches if not exists and returns result. @@ -233,10 +228,10 @@ def getRules(self, chainName: str) -> List[RuleFunc]: # Chain can be empty, if rules disabled. But we still have to return Array. return self.__cache__.get(chainName, []) or [] - def get_all_rules(self) -> List[str]: + def get_all_rules(self) -> list[str]: """Return all available rule names.""" return [r.name for r in self.__rules__] - def get_active_rules(self) -> List[str]: + def get_active_rules(self) -> list[str]: """Return the active rule names.""" return [r.name for r in self.__rules__ if r.enabled] diff --git a/markdown_it/rules_block/__init__.py b/markdown_it/rules_block/__init__.py index c1660b89..bcf138df 100644 --- a/markdown_it/rules_block/__init__.py +++ b/markdown_it/rules_block/__init__.py @@ -13,15 +13,15 @@ "table", ) -from .state_block import StateBlock -from .paragraph import paragraph -from .heading import heading -from .lheading import lheading +from .blockquote import blockquote from .code import code from .fence import fence +from .heading import heading from .hr import hr +from .html_block import html_block +from .lheading import lheading from .list import list_block +from .paragraph import paragraph from .reference import reference -from .blockquote import blockquote -from .html_block import html_block +from .state_block import StateBlock from .table import table diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 543c1f9a..6575731d 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -1,9 +1,10 @@ # Block quotes +from __future__ import annotations + import logging -from typing import Optional -from .state_block import StateBlock from ..common.utils import isSpace +from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -36,7 +37,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): initial = offset = state.sCount[startLine] + 1 try: - second_char_code: Optional[int] = state.srcCharCode[pos] + second_char_code: int | None = state.srcCharCode[pos] except IndexError: second_char_code = None @@ -155,7 +156,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): initial = offset = state.sCount[nextLine] + 1 try: - next_char: Optional[int] = state.srcCharCode[pos] + next_char: int | None = state.srcCharCode[pos] except IndexError: next_char = None @@ -295,6 +296,4 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): state.blkIndent = oldIndent - state.lineMax += 1 - return True diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py index 6d9c87a3..c4fdba33 100644 --- a/markdown_it/rules_block/code.py +++ b/markdown_it/rules_block/code.py @@ -1,5 +1,6 @@ """Code block (4 spaces padded).""" import logging + from .state_block import StateBlock LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 353520a3..8d4ef3e2 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -1,9 +1,10 @@ """ Atex heading (#, ##, ...) """ +from __future__ import annotations + import logging -from typing import Optional -from .state_block import StateBlock from ..common.utils import isSpace +from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -19,7 +20,7 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - ch: Optional[int] = state.srcCharCode[pos] + ch: int | None = state.srcCharCode[pos] # /* # */ if ch != 0x23 or pos >= maximum: diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 01c68552..804cd9db 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -7,7 +7,6 @@ from ..common.utils import isSpace from .state_block import StateBlock - LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index 3bb850e5..31afab76 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -1,17 +1,18 @@ # HTML block +from __future__ import annotations + import logging import re -from typing import List, Tuple, Pattern -from .state_block import StateBlock from ..common.html_blocks import block_names from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR +from .state_block import StateBlock LOGGER = logging.getLogger(__name__) # An array of opening and corresponding closing sequences for html tags, # last argument defines whether it can terminate a paragraph or not -HTML_SEQUENCES: List[Tuple[Pattern, Pattern, bool]] = [ +HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [ ( re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE), re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE), diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index a5318c96..a7617ad2 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -1,8 +1,8 @@ # Lists import logging -from .state_block import StateBlock from ..common.utils import isSpace +from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -230,6 +230,8 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): token = state.push("list_item_open", "li", 1) token.markup = chr(markerCharCode) token.map = itemLines = [startLine, 0] + if isOrdered: + token.info = state.src[start : posAfterMarker - 1] # change current state, then restore it after parser subcall oldTight = state.tight @@ -313,6 +315,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): posAfterMarker = skipOrderedListMarker(state, nextLine) if posAfterMarker < 0: break + start = state.bMarks[nextLine] + state.tShift[nextLine] else: posAfterMarker = skipBulletListMarker(state, nextLine) if posAfterMarker < 0: diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 1704d806..35adde2a 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -1,9 +1,8 @@ import logging -from ..common.utils import isSpace, normalizeReference, charCodeAt +from ..common.utils import charCodeAt, isSpace, normalizeReference from .state_block import StateBlock - LOGGER = logging.getLogger(__name__) diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 69ad0c4d..42b8fce3 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -1,8 +1,10 @@ -from typing import List, Optional, Tuple, TYPE_CHECKING +from __future__ import annotations + +from typing import TYPE_CHECKING -from ..token import Token -from ..ruler import StateBase from ..common.utils import isSpace +from ..ruler import StateBase +from ..token import Token if TYPE_CHECKING: from markdown_it.main import MarkdownIt @@ -12,10 +14,10 @@ class StateBlock(StateBase): def __init__( self, src: str, - md: "MarkdownIt", + md: MarkdownIt, env, - tokens: List[Token], - srcCharCode: Optional[Tuple[int, ...]] = None, + tokens: list[Token], + srcCharCode: tuple[int, ...] | None = None, ): if srcCharCode is not None: @@ -145,8 +147,7 @@ def skipEmptyLines(self, from_pos: int) -> int: ]: break except IndexError: - from_pos += 1 - break + pass from_pos += 1 return from_pos diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index 8c2c5927..e3db8584 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -1,9 +1,8 @@ # GFM table, https://github.github.com/gfm/#tables-extension- import re +from ..common.utils import charCodeAt, isSpace from .state_block import StateBlock -from ..common.utils import isSpace, charCodeAt - headerLineRe = re.compile(r"^:?-+:?$") enclosingPipesRe = re.compile(r"^\||\|$") diff --git a/markdown_it/rules_core/__init__.py b/markdown_it/rules_core/__init__.py index 7f5de3e4..f80034c5 100644 --- a/markdown_it/rules_core/__init__.py +++ b/markdown_it/rules_core/__init__.py @@ -8,10 +8,10 @@ "linkify", ) -from .state_core import StateCore -from .normalize import normalize from .block import block from .inline import inline +from .linkify import linkify +from .normalize import normalize from .replacements import replace from .smartquotes import smartquotes -from .linkify import linkify +from .state_core import StateCore diff --git a/markdown_it/rules_core/linkify.py b/markdown_it/rules_core/linkify.py index 0acc6f11..49bb4ef3 100644 --- a/markdown_it/rules_core/linkify.py +++ b/markdown_it/rules_core/linkify.py @@ -1,9 +1,8 @@ import re from ..common.utils import arrayReplaceAt -from .state_core import StateCore from ..token import Token - +from .state_core import StateCore LINK_OPEN_RE = re.compile(r"^\s]", flags=re.IGNORECASE) LINK_CLOSE_RE = re.compile(r"^", flags=re.IGNORECASE) diff --git a/markdown_it/rules_core/normalize.py b/markdown_it/rules_core/normalize.py index 14b2f679..bf16fd7a 100644 --- a/markdown_it/rules_core/normalize.py +++ b/markdown_it/rules_core/normalize.py @@ -3,7 +3,6 @@ from .state_core import StateCore - # https://spec.commonmark.org/0.29/#line-ending NEWLINES_RE = re.compile(r"\r\n?|\n") NULL_RE = re.compile(r"\0") diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py index ee3b9edb..45377d3e 100644 --- a/markdown_it/rules_core/replacements.py +++ b/markdown_it/rules_core/replacements.py @@ -14,12 +14,13 @@ * ``--`` → &ndash * ``---`` → &mdash """ +from __future__ import annotations + import logging import re -from typing import List, Match -from .state_core import StateCore from ..token import Token +from .state_core import StateCore LOGGER = logging.getLogger(__name__) @@ -55,11 +56,11 @@ SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"} -def replaceFn(match: Match[str]): +def replaceFn(match: re.Match[str]): return SCOPED_ABBR[match.group(1).lower()] -def replace_scoped(inlineTokens: List[Token]) -> None: +def replace_scoped(inlineTokens: list[Token]) -> None: inside_autolink = 0 for token in inlineTokens: @@ -73,7 +74,7 @@ def replace_scoped(inlineTokens: List[Token]) -> None: inside_autolink += 1 -def replace_rare(inlineTokens: List[Token]) -> None: +def replace_rare(inlineTokens: list[Token]) -> None: inside_autolink = 0 for token in inlineTokens: diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index c3211191..93f8be28 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -1,13 +1,13 @@ """Convert straight quotation marks to typographic ones """ +from __future__ import annotations + import re -from typing import Any, Dict, List +from typing import Any -from .state_core import StateCore -from ..common.utils import charCodeAt -from ..common.utils import isWhiteSpace, isPunctChar, isMdAsciiPunct +from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace from ..token import Token - +from .state_core import StateCore QUOTE_TEST_RE = re.compile(r"['\"]") QUOTE_RE = re.compile(r"['\"]") @@ -21,8 +21,8 @@ def replaceAt(string: str, index: int, ch: str) -> str: return string[:index] + ch + string[index + 1 :] -def process_inlines(tokens: List[Token], state: StateCore) -> None: - stack: List[Dict[str, Any]] = [] +def process_inlines(tokens: list[Token], state: StateCore) -> None: + stack: list[dict[str, Any]] = [] for i in range(len(tokens)): token = tokens[i] diff --git a/markdown_it/rules_core/state_core.py b/markdown_it/rules_core/state_core.py index a560a283..15b7c605 100644 --- a/markdown_it/rules_core/state_core.py +++ b/markdown_it/rules_core/state_core.py @@ -1,7 +1,10 @@ -from typing import List, MutableMapping, Optional, TYPE_CHECKING +from __future__ import annotations + +from collections.abc import MutableMapping +from typing import TYPE_CHECKING -from ..token import Token from ..ruler import StateBase +from ..token import Token if TYPE_CHECKING: from markdown_it import MarkdownIt @@ -11,12 +14,12 @@ class StateCore(StateBase): def __init__( self, src: str, - md: "MarkdownIt", + md: MarkdownIt, env: MutableMapping, - tokens: Optional[List[Token]] = None, + tokens: list[Token] | None = None, ): self.src = src self.md = md # link to parser instance self.env = env - self.tokens: List[Token] = tokens or [] + self.tokens: list[Token] = tokens or [] self.inlineMode = False diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py index 0cce406b..f27907ce 100644 --- a/markdown_it/rules_inline/__init__.py +++ b/markdown_it/rules_inline/__init__.py @@ -14,17 +14,16 @@ "html_inline", "strikethrough", ) -from .state_inline import StateInline -from .text import text -from .text_collapse import text_collapse +from . import emphasis, strikethrough +from .autolink import autolink +from .backticks import backtick from .balance_pairs import link_pairs +from .entity import entity from .escape import escape -from .newline import newline -from .backticks import backtick -from . import emphasis +from .html_inline import html_inline from .image import image from .link import link -from .autolink import autolink -from .entity import entity -from .html_inline import html_inline -from . import strikethrough +from .newline import newline +from .state_inline import StateInline +from .text import text +from .text_collapse import text_collapse diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index 6a55e49a..a4ee61c3 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -1,5 +1,6 @@ # Process autolinks '' import re + from .state_inline import StateInline EMAIL_RE = re.compile( diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index ef32c8d9..9001b09e 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -1,7 +1,7 @@ # Process *this* and _that_ # -from .state_inline import StateInline, Delimiter +from .state_inline import Delimiter, StateInline def tokenize(state: StateInline, silent: bool): diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 8354e6c7..883a9666 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -2,7 +2,7 @@ import re from ..common.entities import entities -from ..common.utils import isValidEntityCode, fromCodePoint +from ..common.utils import fromCodePoint, isValidEntityCode from .state_inline import StateInline DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE) diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 64d9a678..36bd0402 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -1,9 +1,8 @@ """ Process escaped chars and hardbreaks """ -from .state_inline import StateInline from ..common.utils import isSpace - +from .state_inline import StateInline ESCAPED = [0 for _ in range(256)] for ch in "\\!\"#$%&'()*+,./:;<=>?@[]^_`{|}~-": diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 7333e370..295cc5c7 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -1,6 +1,6 @@ # Process html tags -from .state_inline import StateInline from ..common.html_re import HTML_TAG_RE +from .state_inline import StateInline def isLetter(ch: int): diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index d3813f77..d2a08d47 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -1,10 +1,9 @@ # Process ![image]( "title") +from __future__ import annotations -from typing import List - -from .state_inline import StateInline -from ..token import Token from ..common.utils import isSpace, normalizeReference +from ..token import Token +from .state_inline import StateInline def image(state: StateInline, silent: bool): @@ -132,7 +131,7 @@ def image(state: StateInline, silent: bool): if not silent: content = state.src[labelStart:labelEnd] - tokens: List[Token] = [] + tokens: list[Token] = [] state.md.inline.parse(content, state.md, state.env, tokens) token = state.push("image", "img", 0) diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 919ccf12..2394d6c3 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -1,6 +1,6 @@ # Process [link]( "stuff") -from ..common.utils import normalizeReference, isSpace +from ..common.utils import isSpace, normalizeReference from .state_inline import StateInline diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index b4b8a67f..3034e408 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -1,8 +1,8 @@ # Proceess '\n' import re -from .state_inline import StateInline from ..common.utils import charCodeAt, isSpace +from .state_inline import StateInline endSpace = re.compile(r" +$") diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 54555411..283532cc 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -1,23 +1,26 @@ -from collections import namedtuple -from typing import Dict, List, MutableMapping, Optional, TYPE_CHECKING +from __future__ import annotations -import attr +from collections import namedtuple +from collections.abc import MutableMapping +from dataclasses import dataclass +from typing import TYPE_CHECKING -from ..token import Token +from .._compat import DATACLASS_KWARGS +from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace from ..ruler import StateBase -from ..common.utils import isWhiteSpace, isPunctChar, isMdAsciiPunct +from ..token import Token if TYPE_CHECKING: from markdown_it import MarkdownIt -@attr.s(slots=True) +@dataclass(**DATACLASS_KWARGS) class Delimiter: # Char code of the starting marker (number). - marker: int = attr.ib() + marker: int # Total length of these series of delimiters. - length: int = attr.ib() + length: int # An amount of characters before this one that's equivalent to # current one. In plain English: if this delimiter does not open @@ -25,21 +28,21 @@ class Delimiter: # # Used to skip sequences like "*****" in one step, for 1st asterisk # value will be 0, for 2nd it's 1 and so on. - jump: int = attr.ib() + jump: int # A position of the token this delimiter corresponds to. - token: int = attr.ib() + token: int # If this delimiter is matched as a valid opener, `end` will be # equal to its position, otherwise it's `-1`. - end: int = attr.ib() + end: int # Boolean flags that determine if this delimiter could open or close # an emphasis. - open: bool = attr.ib() - close: bool = attr.ib() + open: bool + close: bool - level: bool = attr.ib(default=None) + level: bool | None = None Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"]) @@ -47,13 +50,13 @@ class Delimiter: class StateInline(StateBase): def __init__( - self, src: str, md: "MarkdownIt", env: MutableMapping, outTokens: List[Token] + self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token] ): self.src = src self.env = env self.md = md self.tokens = outTokens - self.tokens_meta: List[Optional[dict]] = [None] * len(outTokens) + self.tokens_meta: list[dict | None] = [None] * len(outTokens) self.pos = 0 self.posMax = len(self.src) @@ -63,16 +66,16 @@ def __init__( # Stores { start: end } pairs. Useful for backtrack # optimization of pairs parse (emphasis, strikes). - self.cache: Dict[int, int] = {} + self.cache: dict[int, int] = {} # List of emphasis-like delimiters for current tag - self.delimiters: List[Delimiter] = [] + self.delimiters: list[Delimiter] = [] # Stack of delimiter lists for upper level tags - self._prev_delimiters: List[List[Delimiter]] = [] + self._prev_delimiters: list[list[Delimiter]] = [] # backticklength => last seen position - self.backticks: Dict[int, int] = {} + self.backticks: dict[int, int] = {} self.backticksScanned = False def __repr__(self): diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index 87af4b46..107ea26b 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -1,6 +1,7 @@ # ~~strike through~~ -from typing import List -from .state_inline import StateInline, Delimiter +from __future__ import annotations + +from .state_inline import Delimiter, StateInline def tokenize(state: StateInline, silent: bool): @@ -51,7 +52,7 @@ def tokenize(state: StateInline, silent: bool): return True -def _postProcess(state: StateInline, delimiters: List[Delimiter]): +def _postProcess(state: StateInline, delimiters: list[Delimiter]): loneMarkers = [] maximum = len(delimiters) @@ -93,7 +94,7 @@ def _postProcess(state: StateInline, delimiters: List[Delimiter]): i += 1 - # If a marker sequence has an odd number of characters, it's splitted + # If a marker sequence has an odd number of characters, it's split # like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the # start of the sequence. # diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index f36f069a..ec6ee0fa 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -3,14 +3,15 @@ from .state_inline import StateInline - # Rule to skip pure text -# '{}$%@~+=:' reserved for extentions +# '{}$%@~+=:' reserved for extensions # !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~ # !!!! Don't confuse with "Markdown ASCII Punctuation" chars # http://spec.commonmark.org/0.15/#ascii-punctuation-character + + def isTerminatorChar(ch): return ch in { 0x0A, # /* \n */: diff --git a/markdown_it/token.py b/markdown_it/token.py index 36646e07..b20875b6 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -1,17 +1,11 @@ -from typing import ( - Any, - Callable, - Dict, - List, - MutableMapping, - Optional, - Tuple, - Type, - Union, -) +from __future__ import annotations + +from collections.abc import Callable, MutableMapping +import dataclasses as dc +from typing import Any import warnings -import attr +from markdown_it._compat import DATACLASS_KWARGS def convert_attrs(value: Any) -> Any: @@ -26,44 +20,65 @@ def convert_attrs(value: Any) -> Any: return value -@attr.s(slots=True) +@dc.dataclass(**DATACLASS_KWARGS) class Token: - # Type of the token (string, e.g. "paragraph_open") - type: str = attr.ib() - # html tag name, e.g. "p" - tag: str = attr.ib() - # Level change (number in {-1, 0, 1} set), where: - # - `1` means the tag is opening - # - `0` means the tag is self-closing - # - `-1` means the tag is closing - nesting: int = attr.ib() - # Html attributes. Note this differs from the upstream "list of lists" format - attrs: Dict[str, Union[str, int, float]] = attr.ib( - factory=dict, converter=convert_attrs - ) - # Source map info. Format: `[ line_begin, line_end ]` - map: Optional[List[int]] = attr.ib(default=None) - # nesting level, the same as `state.level` - level: int = attr.ib(default=0) - # An array of child nodes (inline and img tokens) - children: Optional[List["Token"]] = attr.ib(default=None) - # In a case of self-closing tag (code, html, fence, etc.), - # it has contents of this tag. - content: str = attr.ib(default="") - # '*' or '_' for emphasis, fence string for fence, etc. - markup: str = attr.ib(default="") - # Additional information: - # - Info string for "fence" tokens - # - The value "auto" for autolink "link_open" and "link_close" tokens - info: str = attr.ib(default="") - # A place for plugins to store any arbitrary data - meta: dict = attr.ib(factory=dict) - # True for block-level tokens, false for inline tokens. - # Used in renderer to calculate line breaks - block: bool = attr.ib(default=False) - # If it's true, ignore this element when rendering. - # Used for tight lists to hide paragraphs. - hidden: bool = attr.ib(default=False) + + type: str + """Type of the token (string, e.g. "paragraph_open")""" + + tag: str + """HTML tag name, e.g. 'p'""" + + nesting: int + """Level change (number in {-1, 0, 1} set), where: + - `1` means the tag is opening + - `0` means the tag is self-closing + - `-1` means the tag is closing + """ + + attrs: dict[str, str | int | float] = dc.field(default_factory=dict) + """HTML attributes. + Note this differs from the upstream "list of lists" format, + although than an instance can still be initialised with this format. + """ + + map: list[int] | None = None + """Source map info. Format: `[ line_begin, line_end ]`""" + + level: int = 0 + """Nesting level, the same as `state.level`""" + + children: list[Token] | None = None + """Array of child nodes (inline and img tokens).""" + + content: str = "" + """Inner content, in the case of a self-closing tag (code, html, fence, etc.),""" + + markup: str = "" + """'*' or '_' for emphasis, fence string for fence, etc.""" + + info: str = "" + """Additional information: + - Info string for "fence" tokens + - The value "auto" for autolink "link_open" and "link_close" tokens + - The string value of the item marker for ordered-list "list_item_open" tokens + """ + + meta: dict = dc.field(default_factory=dict) + """A place for plugins to store any arbitrary data""" + + block: bool = False + """True for block-level tokens, false for inline tokens. + Used in renderer to calculate line breaks + """ + + hidden: bool = False + """If true, ignore this element when rendering. + Used for tight lists to hide paragraphs. + """ + + def __post_init__(self): + self.attrs = convert_attrs(self.attrs) def attrIndex(self, name: str) -> int: warnings.warn( @@ -74,20 +89,20 @@ def attrIndex(self, name: str) -> int: return -1 return list(self.attrs.keys()).index(name) - def attrItems(self) -> List[Tuple[str, Union[str, int, float]]]: + def attrItems(self) -> list[tuple[str, str | int | float]]: """Get (key, value) list of attrs.""" return list(self.attrs.items()) - def attrPush(self, attrData: Tuple[str, Union[str, int, float]]) -> None: + def attrPush(self, attrData: tuple[str, str | int | float]) -> None: """Add `[ name, value ]` attribute to list. Init attrs if necessary.""" name, value = attrData self.attrSet(name, value) - def attrSet(self, name: str, value: Union[str, int, float]) -> None: + def attrSet(self, name: str, value: str | int | float) -> None: """Set `name` attribute to `value`. Override old value if exists.""" self.attrs[name] = value - def attrGet(self, name: str) -> Union[None, str, int, float]: + def attrGet(self, name: str) -> None | str | int | float: """Get the value of attribute `name`, or null if it does not exist.""" return self.attrs.get(name, None) @@ -106,18 +121,18 @@ def attrJoin(self, name: str, value: str) -> None: else: self.attrs[name] = value - def copy(self) -> "Token": + def copy(self, **changes: Any) -> Token: """Return a shallow copy of the instance.""" - return attr.evolve(self) + return dc.replace(self, **changes) def as_dict( self, *, children: bool = True, as_upstream: bool = True, - meta_serializer: Optional[Callable[[dict], Any]] = None, - filter: Optional[Callable[[attr.Attribute, Any], bool]] = None, - dict_factory: Type[MutableMapping[str, Any]] = dict, + meta_serializer: Callable[[dict], Any] | None = None, + filter: Callable[[str, Any], bool] | None = None, + dict_factory: Callable[..., MutableMapping[str, Any]] = dict, ) -> MutableMapping[str, Any]: """Return the token as a dictionary. @@ -127,16 +142,15 @@ def as_dict( :param meta_serializer: hook for serializing ``Token.meta`` :param filter: A callable whose return code determines whether an attribute or element is included (``True``) or dropped (``False``). - Is called with the `attr.Attribute` as the first argument and the - value as the second argument. + Is called with the (key, value) pair. :param dict_factory: A callable to produce dictionaries from. For example, to produce ordered dictionaries instead of normal Python dictionaries, pass in ``collections.OrderedDict``. """ - mapping = attr.asdict( - self, recurse=False, filter=filter, dict_factory=dict_factory - ) + mapping = dict_factory((f.name, getattr(self, f.name)) for f in dc.fields(self)) + if filter: + mapping = dict_factory((k, v) for k, v in mapping.items() if filter(k, v)) if as_upstream and "attrs" in mapping: mapping["attrs"] = ( None @@ -159,73 +173,9 @@ def as_dict( return mapping @classmethod - def from_dict(cls, dct: MutableMapping[str, Any]) -> "Token": + def from_dict(cls, dct: MutableMapping[str, Any]) -> Token: """Convert a dict to a Token.""" token = cls(**dct) if token.children: token.children = [cls.from_dict(c) for c in token.children] # type: ignore[arg-type] return token - - -@attr.s(slots=True) -class NestedTokens: - """A class that closely resembles a Token, - but for a an opening/closing Token pair, and their containing children. - """ - - opening: Token = attr.ib() - closing: Token = attr.ib() - children: List[Union[Token, "NestedTokens"]] = attr.ib(factory=list) - - def __getattr__(self, name): - return getattr(self.opening, name) - - def attrGet(self, name: str) -> Union[None, str, int, float]: - """ Get the value of attribute `name`, or null if it does not exist.""" - return self.opening.attrGet(name) - - -def nest_tokens(tokens: List[Token]) -> List[Union[Token, NestedTokens]]: - """Convert the token stream to a list of tokens and nested tokens. - - ``NestedTokens`` contain the open and close tokens and a list of children - of all tokens in between (recursively nested) - """ - warnings.warn( - "`markdown_it.token.nest_tokens` and `markdown_it.token.NestedTokens`" - " are deprecated. Please migrate to `markdown_it.tree.SyntaxTreeNode`", - DeprecationWarning, - ) - - output: List[Union[Token, NestedTokens]] = [] - - tokens = list(reversed(tokens)) - while tokens: - token = tokens.pop() - - if token.nesting == 0: - token = token.copy() - output.append(token) - if token.children: - # Ignore type checkers because `nest_tokens` doesn't respect - # typing of `Token.children`. We add `NestedTokens` into a - # `List[Token]` here. - token.children = nest_tokens(token.children) # type: ignore - continue - - assert token.nesting == 1, token.nesting - - nested_tokens = [token] - nesting = 1 - while tokens and nesting != 0: - token = tokens.pop() - nested_tokens.append(token) - nesting += token.nesting - if nesting != 0: - raise ValueError(f"unclosed tokens starting {nested_tokens[0]}") - - child = NestedTokens(nested_tokens[0], nested_tokens[-1]) - output.append(child) - child.children = nest_tokens(nested_tokens[1:-1]) - - return output diff --git a/markdown_it/tree.py b/markdown_it/tree.py index edbc35e0..09476b22 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -2,20 +2,11 @@ This module is not part of upstream JavaScript markdown-it. """ +from __future__ import annotations + +from collections.abc import Generator, Sequence import textwrap -from typing import ( - Generator, - NamedTuple, - Sequence, - Tuple, - Dict, - List, - Optional, - Any, - TypeVar, - overload, - Union, -) +from typing import Any, NamedTuple, TypeVar, overload from .token import Token from .utils import _removesuffix @@ -50,10 +41,10 @@ def __init__( If `create_root` is True, create a root node for the document. """ # Only nodes representing an unnested token have self.token - self.token: Optional[Token] = None + self.token: Token | None = None # Only containers have nester tokens - self.nester_tokens: Optional[_NesterTokens] = None + self.nester_tokens: _NesterTokens | None = None # Root node does not have self.parent self._parent: Any = None @@ -92,18 +83,16 @@ def __getitem__(self: _NodeType, item: int) -> _NodeType: ... @overload - def __getitem__(self: _NodeType, item: slice) -> List[_NodeType]: + def __getitem__(self: _NodeType, item: slice) -> list[_NodeType]: ... - def __getitem__( - self: _NodeType, item: Union[int, slice] - ) -> Union[_NodeType, List[_NodeType]]: + def __getitem__(self: _NodeType, item: int | slice) -> _NodeType | list[_NodeType]: return self.children[item] - def to_tokens(self: _NodeType) -> List[Token]: + def to_tokens(self: _NodeType) -> list[Token]: """Recover the linear token stream.""" - def recursive_collect_tokens(node: _NodeType, token_list: List[Token]) -> None: + def recursive_collect_tokens(node: _NodeType, token_list: list[Token]) -> None: if node.type == "root": for child in node.children: recursive_collect_tokens(child, token_list) @@ -116,24 +105,24 @@ def recursive_collect_tokens(node: _NodeType, token_list: List[Token]) -> None: recursive_collect_tokens(child, token_list) token_list.append(node.nester_tokens.closing) - tokens: List[Token] = [] + tokens: list[Token] = [] recursive_collect_tokens(self, tokens) return tokens @property - def children(self: _NodeType) -> List[_NodeType]: + def children(self: _NodeType) -> list[_NodeType]: return self._children @children.setter - def children(self: _NodeType, value: List[_NodeType]) -> None: + def children(self: _NodeType, value: list[_NodeType]) -> None: self._children = value @property - def parent(self: _NodeType) -> Optional[_NodeType]: + def parent(self: _NodeType) -> _NodeType | None: return self._parent @parent.setter - def parent(self: _NodeType, value: Optional[_NodeType]) -> None: + def parent(self: _NodeType, value: _NodeType | None) -> None: self._parent = value @property @@ -178,7 +167,7 @@ def type(self) -> str: return _removesuffix(self.nester_tokens.opening.type, "_open") @property - def next_sibling(self: _NodeType) -> Optional[_NodeType]: + def next_sibling(self: _NodeType) -> _NodeType | None: """Get the next node in the sequence of siblings. Returns `None` if this is the last sibling. @@ -189,7 +178,7 @@ def next_sibling(self: _NodeType) -> Optional[_NodeType]: return None @property - def previous_sibling(self: _NodeType) -> Optional[_NodeType]: + def previous_sibling(self: _NodeType) -> _NodeType | None: """Get the previous node in the sequence of siblings. Returns `None` if this is the first sibling. @@ -282,21 +271,21 @@ def _attribute_token(self) -> Token: @property def tag(self) -> str: - """html tag name, e.g. \"p\"""" + """html tag name, e.g. \"p\" """ return self._attribute_token().tag @property - def attrs(self) -> Dict[str, Union[str, int, float]]: + def attrs(self) -> dict[str, str | int | float]: """Html attributes.""" return self._attribute_token().attrs - def attrGet(self, name: str) -> Union[None, str, int, float]: + def attrGet(self, name: str) -> None | str | int | float: """Get the value of attribute `name`, or null if it does not exist.""" return self._attribute_token().attrGet(name) @property - def map(self) -> Optional[Tuple[int, int]]: - """Source map info. Format: `Tuple[ line_begin, line_end ]`""" + def map(self) -> tuple[int, int] | None: + """Source map info. Format: `tuple[ line_begin, line_end ]`""" map_ = self._attribute_token().map if map_: # Type ignore because `Token`s attribute types are not perfect diff --git a/markdown_it/utils.py b/markdown_it/utils.py index 5d1ce723..2ba2995a 100644 --- a/markdown_it/utils.py +++ b/markdown_it/utils.py @@ -1,5 +1,7 @@ +from __future__ import annotations + +from collections.abc import Callable from pathlib import Path -from typing import Callable, List, Optional, Union class OptionsDict(dict): @@ -78,16 +80,16 @@ def langPrefix(self, value: str): self["langPrefix"] = value @property - def highlight(self) -> Optional[Callable[[str, str, str], str]]: + def highlight(self) -> Callable[[str, str, str], str] | None: """Highlighter function: (content, langName, langAttrs) -> escaped HTML.""" return self["highlight"] @highlight.setter - def highlight(self, value: Optional[Callable[[str, str, str], str]]): + def highlight(self, value: Callable[[str, str, str], str] | None): self["highlight"] = value -def read_fixture_file(path: Union[str, Path]) -> List[list]: +def read_fixture_file(path: str | Path) -> list[list]: text = Path(path).read_text(encoding="utf-8") tests = [] section = 0 diff --git a/profiler.py b/profiler.py new file mode 100644 index 00000000..414a7727 --- /dev/null +++ b/profiler.py @@ -0,0 +1,19 @@ +"""A script for profiling. + +To generate and read results: + - `tox -e profile` + - `firefox .tox/prof/output.svg` +""" +from pathlib import Path + +from markdown_it import MarkdownIt + +commonmark_spec = ( + (Path(__file__).parent / "tests" / "test_cmark_spec" / "spec.md") + .read_bytes() + .decode() +) + +# Run this a few times to emphasize over imports and other overhead above +for _ in range(10): + MarkdownIt().render(commonmark_spec) diff --git a/pyproject.toml b/pyproject.toml index f7e22d4a..e0017185 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,104 @@ [build-system] -requires = ["setuptools>=46.4.0", "wheel"] -build-backend = "setuptools.build_meta" +requires = ["flit_core >=3.4,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "markdown-it-py" +dynamic = ["version"] +description = "Python port of markdown-it. Markdown parsing, done right!" +readme = "README.md" +authors = [{name = "Chris Sewell", email = "chrisj_sewell@hotmail.com"}] +license = {file = "LICENSE"} +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Text Processing :: Markup", +] +keywords = ["markdown", "lexer", "parser", "commonmark", "markdown-it"] +requires-python = ">=3.7" +dependencies = [ + "mdurl~=0.1", + "typing_extensions>=3.7.4;python_version<'3.8'", +] + +[project.urls] +Homepage = "https://github.com/executablebooks/markdown-it-py" +Documentation = "https://markdown-it-py.readthedocs.io/" + +[project.optional-dependencies] +code_style = ["pre-commit==2.6"] +compare = [ + "commonmark~=0.9.1", + "markdown~=3.3.6", + "mistletoe~=0.8.1", + "mistune~=2.0.2", + "panflute~=2.1.3", +] +linkify = ["linkify-it-py~=1.0"] +plugins = ["mdit-py-plugins"] +rtd = [ + "attrs", + "myst-parser", + "pyyaml", + "sphinx", + "sphinx-copybutton", + "sphinx-design", + "sphinx_book_theme", +] +testing = [ + "coverage", + "pytest", + "pytest-cov", + "pytest-regressions", +] +benchmarking = [ + "psutil", + "pytest", + "pytest-benchmark~=3.2", +] +profiling = ["gprof2dot"] + +[project.scripts] +markdown-it = "markdown_it.cli.parse:main" + +[tool.flit.module] +name = "markdown_it" + +[tool.flit.sdist] +exclude = [ + "docs/", + "tests/", + "benchmarking/" +] + +[tool.isort] +profile = "black" +force_sort_within_sections = true + +[tool.mypy] +show_error_codes = true +warn_unused_ignores = true +warn_redundant_casts = true +no_implicit_optional = true +strict_equality = true +implicit_reexport = false + +[[tool.mypy.overrides]] +module = ["tests.test_plugins.*", "markdown.*"] +ignore_errors = true + +[[tool.mypy.overrides]] +module = ["markdown.*"] +ignore_missing_imports = true [tool.pytest.ini_options] xfail_strict = true diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 2815e3eb..00000000 --- a/setup.cfg +++ /dev/null @@ -1,92 +0,0 @@ -[metadata] -name = markdown-it-py -version = attr: markdown_it.__version__ -description = Python port of markdown-it. Markdown parsing, done right! -long_description = file: README.md -long_description_content_type = text/markdown -url = https://github.com/executablebooks/markdown-it-py -author = Chris Sewell -author_email = chrisj_sewell@hotmail.com -license = MIT -license_file = LICENSE -classifiers = - Development Status :: 5 - Production/Stable - Intended Audience :: Developers - License :: OSI Approved :: MIT License - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - Topic :: Software Development :: Libraries :: Python Modules - Topic :: Text Processing :: Markup -keywords = markdown lexer parser development -project_urls = - Documentation=https://markdown-it-py.readthedocs.io - -[options] -packages = find: -install_requires = - attrs>=19,<22 - mdurl~=0.1 - typing_extensions>=3.7.4;python_version<'3.8' -python_requires = ~=3.6 -include_package_data = True -zip_safe = False - -[options.entry_points] -console_scripts = - markdown-it = markdown_it.cli.parse:main - -[options.extras_require] -code_style = - pre-commit==2.6 -compare = - commonmark~=0.9.1 - markdown~=3.2.2 - mistletoe-ebp~=0.10.0 - mistune~=0.8.4 - panflute~=1.12 -linkify = - linkify-it-py~=1.0 -plugins = - mdit-py-plugins -rtd = - myst-nb==0.13.0a1 - pyyaml - sphinx>=2,<4 - sphinx-copybutton - sphinx-panels~=0.4.0 - sphinx_book_theme -testing = - coverage - pytest - pytest-cov - pytest-regressions -benchmarking = - psutil - pytest - pytest-benchmark~=3.2 - -[options.packages.find] -exclude = - test* - benchmarking - -[mypy] -show_error_codes = True -warn_unused_ignores = True -warn_redundant_casts = True -no_implicit_optional = True -strict_equality = True -implicit_reexport = False - -[mypy-tests.test_plugins.*] -ignore_errors = True - -[flake8] -max-line-length = 100 -extend-ignore = E203 diff --git a/setup.py b/setup.py deleted file mode 100644 index 36141267..00000000 --- a/setup.py +++ /dev/null @@ -1,6 +0,0 @@ -# This file is needed for editable installs (`pip install -e .`). -# Can be removed once the following is resolved -# https://github.com/pypa/packaging-problems/issues/256 -from setuptools import setup - -setup() diff --git a/tests/test_api/test_token.py b/tests/test_api/test_token.py index df4a0390..e3806b50 100644 --- a/tests/test_api/test_token.py +++ b/tests/test_api/test_token.py @@ -1,6 +1,6 @@ import warnings -from markdown_it.token import Token, nest_tokens, NestedTokens +from markdown_it.token import Token def test_token(): @@ -36,33 +36,3 @@ def test_token(): def test_serialization(): token = Token("name", "tag", 0, children=[Token("other", "tag2", 0)]) assert token == Token.from_dict(token.as_dict()) - - -def test_nest_tokens(): - tokens = nest_tokens( - [ - Token("start", "", 0), - Token("open", "", 1), - Token("open_inner", "", 1), - Token("inner", "", 0), - Token("close_inner", "", -1), - Token("close", "", -1), - Token("end", "", 0), - ] - ) - assert [t.type for t in tokens] == ["start", "open", "end"] - assert isinstance(tokens[0], Token) - assert isinstance(tokens[1], NestedTokens) - assert isinstance(tokens[2], Token) - - nested = tokens[1] - assert nested.opening.type == "open" - assert nested.closing.type == "close" - assert len(nested.children) == 1 - assert nested.children[0].type == "open_inner" - - nested2 = nested.children[0] - assert nested2.opening.type == "open_inner" - assert nested2.closing.type == "close_inner" - assert len(nested2.children) == 1 - assert nested2.children[0].type == "inner" diff --git a/tests/test_cmark_spec/spec.md b/tests/test_cmark_spec/spec.md index e6f31375..2d79f7b7 100644 --- a/tests/test_cmark_spec/spec.md +++ b/tests/test_cmark_spec/spec.md @@ -4533,7 +4533,7 @@ inside the code block: Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a -characer other than a space or tab, and (b) cases in which +character other than a space or tab, and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with three spaces of indentation, the rules do not allow us to form a list item by diff --git a/tests/test_cmark_spec/test_spec/test_file.html b/tests/test_cmark_spec/test_spec/test_file.html index 9f73ce6c..1c2dc3cb 100644 --- a/tests/test_cmark_spec/test_spec/test_file.html +++ b/tests/test_cmark_spec/test_spec/test_file.html @@ -3418,7 +3418,7 @@

List items

Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a -characer other than a space or tab, and (b) cases in which +character other than a space or tab, and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with three spaces of indentation, the rules do not allow us to form a list item by diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md index ca14db31..168b039d 100644 --- a/tests/test_port/fixtures/commonmark_extras.md +++ b/tests/test_port/fixtures/commonmark_extras.md @@ -663,3 +663,34 @@ Issue #772. Header rule should not interfere with html tags. == . + +Issue #205. Space in link destination generates IndexError +. +[Contact](http:// mail.com) + +[Contact](mailto: mail@mail.com) +. +

[Contact](http:// mail.com)

+

[Contact](mailto: mail@mail.com)

+. + +Issue #204. Combination of blockquotes, list and newlines causes an IndexError +. +> QUOTE ++ UNORDERED LIST ITEM + > INDENTED QUOTE + + + +. +
+

QUOTE

+
+
    +
  • UNORDERED LIST ITEM +
    +

    INDENTED QUOTE

    +
    +
  • +
+. diff --git a/tests/test_port/fixtures/fatal.md b/tests/test_port/fixtures/fatal.md index dfeeb2e7..7b2afcfc 100644 --- a/tests/test_port/fixtures/fatal.md +++ b/tests/test_port/fixtures/fatal.md @@ -1,4 +1,4 @@ -Should not throw exception on invalid chars in URL (`*` not allowed in path) [mailformed URI] +Should not throw exception on invalid chars in URL (`*` not allowed in path) [malformed URI] . [foo](<%test>) . @@ -6,7 +6,7 @@ Should not throw exception on invalid chars in URL (`*` not allowed in path) [ma . -Should not throw exception on broken utf-8 sequence in URL [mailformed URI] +Should not throw exception on broken utf-8 sequence in URL [malformed URI] . [foo](%C3) . @@ -14,7 +14,7 @@ Should not throw exception on broken utf-8 sequence in URL [mailformed URI] . -Should not throw exception on broken utf-16 surrogates sequence in URL [mailformed URI] +Should not throw exception on broken utf-16 surrogates sequence in URL [malformed URI] . [foo](�) . diff --git a/tests/test_port/test_misc.py b/tests/test_port/test_misc.py index f5f821e9..62b5bf85 100644 --- a/tests/test_port/test_misc.py +++ b/tests/test_port/test_misc.py @@ -1,5 +1,4 @@ -from markdown_it import MarkdownIt -from markdown_it import presets +from markdown_it import MarkdownIt, presets def test_highlight_arguments(): @@ -12,3 +11,34 @@ def highlight_func(str_, lang, attrs): conf["options"]["highlight"] = highlight_func md = MarkdownIt(config=conf) assert md.render("``` a b c d \nhl\n```") == "
==hl\n==
\n" + + +def test_ordered_list_info(): + def type_filter(tokens, type_): + return [t for t in tokens if t.type == type_] + + md = MarkdownIt() + + tokens = md.parse("1. Foo\n2. Bar\n20. Fuzz") + assert len(type_filter(tokens, "ordered_list_open")) == 1 + tokens = type_filter(tokens, "list_item_open") + assert len(tokens) == 3 + assert tokens[0].info == "1" + assert tokens[0].markup == "." + assert tokens[1].info == "2" + assert tokens[1].markup == "." + assert tokens[2].info == "20" + assert tokens[2].markup == "." + + tokens = md.parse(" 1. Foo\n2. Bar\n 20. Fuzz\n 199. Flp") + assert len(type_filter(tokens, "ordered_list_open")) == 1 + tokens = type_filter(tokens, "list_item_open") + assert len(tokens) == 4 + assert tokens[0].info == "1" + assert tokens[0].markup == "." + assert tokens[1].info == "2" + assert tokens[1].markup == "." + assert tokens[2].info == "20" + assert tokens[2].markup == "." + assert tokens[3].info == "199" + assert tokens[3].markup == "." diff --git a/tox.ini b/tox.ini index 158faff1..f4e117e2 100644 --- a/tox.ini +++ b/tox.ini @@ -9,13 +9,13 @@ envlist = py37 [testenv] usedevelop = true -[testenv:py{36,37,38,39,310}] +[testenv:py{37,38,39,310}] extras = linkify testing commands = pytest {posargs:tests/} -[testenv:py{36,37,38,39,310}-plugins] +[testenv:py{37,38,39,310}-plugins] extras = testing changedir = {envtmpdir} allowlist_externals = @@ -27,19 +27,39 @@ commands_pre = commands = pytest {posargs} -[testenv:py{36,37,38,39}-bench-core] +[testenv:py{37,38,39}-bench-core] extras = benchmarking commands = pytest benchmarking/bench_core.py {posargs} -[testenv:py{36,37,38}-bench-packages] +[testenv:py{37,38}-bench-packages] extras = benchmarking,compare commands = pytest benchmarking/bench_packages.py {posargs} [testenv:docs-{update,clean}] extras = linkify,plugins,rtd -whitelist_externals = rm +whitelist_externals = + echo + rm setenv = update: SKIP_APIDOC = true commands = clean: rm -rf docs/_build sphinx-build -nW --keep-going -b {posargs:html} docs/ docs/_build/{posargs:html} +commands_post = echo "open file://{toxinidir}/docs/_build/{posargs:html}/index.html" + +[testenv:profile] +description = run profiler (use e.g. `firefox .tox/prof/output.svg` to open) +extras = profiling +allowlist_externals = + mkdir + dot +commands = + mkdir -p "{toxworkdir}/prof" + python -m cProfile -o "{toxworkdir}/prof/output.pstats" profiler.py + gprof2dot -f pstats -o "{toxworkdir}/prof/output.dot" "{toxworkdir}/prof/output.pstats" + dot -Tsvg -o "{toxworkdir}/prof/output.svg" "{toxworkdir}/prof/output.dot" + python -c 'import pathlib; print("profiler svg output under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "prof" / "output.svg"))' + +[flake8] +max-line-length = 100 +extend-ignore = E203