diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..62104894
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,27 @@
+---
+name: Bug report
+about: Report a parsing error, unexpected output and other bugs
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Attach a minimal markdown snippet that causes the bug to occur. This should be placed inside a fenced code block to escape GitHub's formatting.
+
+If your snippet contains fenced code blocks then you can escape them by adding more backticks to the enclosing block. See the [this GitHub article](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks#fenced-code-blocks) for an example.
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Debug info**
+Version of library being used:
+
+Any extras being used:
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
new file mode 100644
index 00000000..e642ccdd
--- /dev/null
+++ b/.github/workflows/python.yaml
@@ -0,0 +1,32 @@
+name: PythonCI
+on:
+ push:
+ branches: [ master ]
+ pull_request:
+ branches: [ master ]
+jobs:
+ build:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+ os:
+ - ubuntu-latest
+ - macos-latest
+ - windows-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install .[all]
+ - name: Test
+ run: |
+ make testone
+ - name: Test ReDoS
+ run: |
+ make testredos
diff --git a/.gitignore b/.gitignore
index 722351f9..0b671943 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@ sandbox/*.html
__pycache__
.tox
*.egg-info
+*.idea
+venv
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 93208f8f..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-language: python
-python:
- - "2.7"
- - "pypy"
- - "3.4"
- - "3.5"
- - "3.6"
- - "3.7-dev"
-# command to install dependencies
-install: pip install Pygments>=2.1.1
-# command to run tests
-script: make testone
diff --git a/CHANGES.md b/CHANGES.md
index 9b27c624..60c03486 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,8 +1,217 @@
# python-markdown2 Changelog
-## python-markdown2 2.3.9 (not yet released)
+## python-markdown2 2.5.5 (not yet released)
-(nothing yet)
+- [pull #639] Fix middle-word-em interfering with strongs (#637)
+- [pull #640] Fix code friendly extra stopping other syntax being processed (#638)
+- [pull #644] Fix a number of em/strong issues (#641, #642, #643)
+
+
+## python-markdown2 2.5.4
+
+- [pull #617] Add MarkdownFileLinks extra (#528)
+- [pull #622] Add missing block tags to regex (#620)
+- [pull #623] Don't escape plus signs in URLs (#621)
+- [pull #626] Fix XSS when encoding incomplete tags (#625)
+- [pull #628] Fix TypeError in MiddleWordEm extra when options was None (#627)
+- [pull #630] Fix nbsp breaking tables (#629)
+- [pull #634] Fix ReDoS in HTML tokenizer regex (#633)
+
+
+## python-markdown2 2.5.3
+
+- [pull #616] make tables without body gfm compatible
+
+
+## python-markdown2 2.5.2
+
+- [pull #605] Add support for Python 3.13, drop EOL 3.8
+- [pull #607] Fix `middle-word-em` extra preventing strongs from being recognized (#606)
+- [pull #609] Add option to output to file in CLI (#608)
+- [pull #612] Fix footnote labels appearing out-of-order (#536)
+- [pull #613] Fix smarty pants extra not triggering when it should (#611)
+
+
+## python-markdown2 2.5.1
+
+- [pull #590] Fix underscores within bold text getting emphasized (#589)
+- [pull #591] Add Alerts extra
+- [pull #595] Fix img alt text being processed as markdown (#594)
+- [pull #598] Add `link-shortrefs` extra (#597)
+- [pull #600] Use urandom for SECRET_SALT
+- [pull #602] Fix XSS issue in safe mode (#601)
+- [pull #604] Fix XSS injection in image URLs (#603)
+
+
+## python-markdown2 2.5.0
+
+- [pull #519] Add support for custom extras
+- [pull #519] Drop Python 3.5 support
+- [pull #568] Add `prepend` arg to toc extra (#397)
+- [pull #569] Process HTML comments as markdown in 'escape' safe mode
+- [pull #570] Fix syntax warnings in test suite
+- [pull #572] Process inline tags as HTML blocks when they span multiple lines (#571)
+- [pull #573] Add new LaTeX Extra
+- [pull #576] Fix `html`, `head` and `body` tags being wrapped in `
` tags (#575)
+- [pull #578] Graceful handling of broken lists when cuddled-lists extra is enabled
+- [pull #581] Add type hints (#562)
+- [pull #581] Drop Python 3.6 and 3.7 support
+- [pull #582] Fix fenced code blocks breaking lists (#580)
+- [pull #586] Fix #583 by tweaking incomplete tag regex
+- [pull #587] Fix AssertionError on malformed HTML (#584)
+
+
+## python-markdown2 2.4.13
+
+- [pull #559] Allow cuddled tables (#557)
+- [pull #560] Fix `markdown-in-html` not always splitting HTML tags into separate lines (#558)
+- [pull #564] Fix incomplete comments in safe mode not being escaped (#563)
+- [pull #566] Fix crash in `markdown-in-html` extra (#565)
+
+
+## python-markdown2 2.4.12
+
+- [pull #547] Update `markdown-in-html` extra to handle markdown on same line as HTML (#546)
+- [pull #550] Fix tables with trailing whitespace not being recognized (#549)
+- [pull #545] Fix multiple instances of strong emphasis (`**`) in one line (#541)
+- [pull #556] Fix incorrect parsing of links after square brackets (#552)
+
+## python-markdown2 2.4.11
+
+- [pull #524] Fix angles being escaped in style blocks (issue #523)
+- [pull #527] Fix base64 images being corrupted in safe mode (issue #526)
+- [pull #529] Add `breaks` extra with ability to hard break on backslashes (issue #525)
+- [pull #532] Fix #493 persisting when `code-friendly` extra enabled
+- [pull #535] Update `_slugify` to use utf-8 encoding (issue #534)
+- [pull #536] Maintain order of appearance in footnotes
+- [pull #538] Include HTML headers in TOC
+- [pull #540] Add mechanism to prevent header ID counter resetting (issue #530)
+
+## python-markdown2 2.4.10
+
+- [pull #520] Allow more relative links in safe mode (issue #517)
+- [pull #521] Always restore hashed HTML blocks (issue #185)
+- [pull #522] Add `middle-word-em` extra
+
+
+## python-markdown2 2.4.9
+
+- [pull #500] Add `` tag to html-classes extra
+- [pull #501] Fix link patterns extra matching against internal hashes
+- [pull #502] Replace deprecated `optparse` with `argparse`
+- [pull #506] Fix `_uniform_outdent` failing with empty strings (issue #505)
+- [pull #509] Fix HTML elements not unhashing correctly (issue 508)
+- [pull #511] Remove deprecated `imp` module (issue #510)
+- [pull #512] Allow link patterns to be passed via extras dict
+- [pull #513] Fix relative links not working in safe mode (issue #254)
+
+
+## python-markdown2 2.4.8
+
+- [pull #499] Fix images not being procesed correctly (#498)
+
+
+## python-markdown2 2.4.7
+
+- [pull #483] Fix hashing nested HTML blocks
+- [pull #486] Fix backslash being unable to escape raw HTML tags
+- [pull #482] Add support for telegram spoiler in extras
+- [pull #485] mermaid support
+- [pull #487] Fix escaping ampersands in hrefs
+- [pull #490] Fix indented codeblocks inside fences (#489)
+- [pull #490] Remove `code-color` extra
+
+
+## python-markdown2 2.4.6
+
+- [pull #477] Feature wavedrom support
+- [pull #480] Fix mixing ordered and un-ordered lists combining into single list type
+
+
+## python-markdown2 2.4.5
+
+- [pull #466] Add optional dependencies to `setup.py`
+
+
+## python-markdown2 2.4.4
+
+- [pull #439] Fix TypeError if html-classes extra is None
+- [pull #441] Remove Python2 support
+- [pull #445] Replace `` with `` in strike extra
+- [pull #446] Fix link patterns extra applying within links
+- [pull #443] create proper entry point
+- [pull #449] Codespans inside link text issue344
+- [pull #451] Underline and HTML comments
+- [pull #453] Links with brackets
+- [pull #454] Fix emacs local variable one-liners
+- [pull #457] Example of the current mixed-paragraph mode behavior in lists
+- [pull #455] Fix code block indentation in lists
+- [pull #434] Fix filter bypass leading to XSS (#362)
+- [pull #464] Fix html-classes extra not applying to code spans
+- [pull #462] Fix pygments block matching
+- [pull #462] Fix pyshell blocks in blockquotes
+- [pull #463] Fix multilevel lists
+- [pull #468] Remove `_uniform_outdent_limit` function
+- [pull #470] Add support for ordered lists that don't start at 1. (#469)
+- [pull #472] Fix `AssertionError` with lazy numbered lists (issue #471)
+- [pull #475] Add `
` and `` tags to html-classes extra (#352)
+- [pull #473] XSS test and fix
+
+
+## python-markdown2 2.4.3
+
+- [pull #413] Fix meta indentation
+- [pull #414] Fix code surrounded by blank lines inside blockquote fenced code blocks
+- [pull #417] Fix inline code pipe symbol within tables (issue #399)
+- [pull #418] Fix code block parsing error (issue #327)
+- [pull #419] Fix hr block created when not supposed to (issue #400)
+- [pull #421] Fix backslashes removed by adjacent code blocks (issues #369 and #412)
+- [pull #420] Fix md5-* in resulting HTML when several code blocks follow one by one (issue #355)
+- [pull #422] Fix excessive ` ` tags in lists using break-on-newline extra (issue #394)
+- [pull #424] Standardize key and value definitions for metadata extra (issue #423)
+- [pull #427] Fix fenced code blocks breaking lists (issue #426)
+- [pull #429] Fix catastrophic backtracking (Regex DoS) in pyshell blocks.
+- [pull #431] Fix incorrect indentation of fenced code blocks within lists
+- [pull #436] RST admonitions
+- [pull #430] Improve error message if link_patterns forgotten
+- [pull #437] fix compatibility with pygments 2.12
+
+
+## python-markdown2 2.4.2
+
+- [pull #408] Fix for fenced code blocks issue #396
+- [pull #410] Be more strict on auto linking urls, RE DOS fix
+
+
+## python-markdown2 2.4.1
+
+- [pull #389] Tables extra: allow whitespace at the end of the underline row
+- [pull #392] Pyshell extra: enable syntax highlighting if `fenced-code-blocks` is loaded.
+- [pull #402] Regex DOS bandaid fix
+
+
+## python-markdown2 2.4.0
+
+- [pull #377] Fixed bug breaking strings elements in metadata lists
+- [pull #380] When rendering fenced code blocks, also add the `language-LANG` class
+- [pull #387] Regex DoS fixes
+
+
+## python-markdown2 2.3.10
+
+- [pull #356] Don't merge sequential quotes into a single blockquote
+- [pull #357] use style=text-align for table alignment
+- [pull #360] introduce underline extra
+- [pull #368] Support for structured and nested values in metadata
+- [pull #371] add noopener to external links
+
+
+## python-markdown2 2.3.9
+
+- [pull #335] Added header support for wiki tables
+- [pull #336] Reset _toc when convert is run
+- [pull #353] XSS fix
+- [pull #350] XSS fix
## python-markdown2 2.3.8
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index be989934..0820b079 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -42,3 +42,23 @@ Sym Roe (github.com/symroe)
Alex Elzenaar (github.com/aelzenaar)
Francisco Saldaña (github.com/FrankSalad)
Shivam Kumar Jha (github.com/thealphadollar)
+ryanvilbrandt (github.com/ryanvilbrandt)
+Gareth Simpson (github.com/xurble)
+Kat Hagan (github.com/codebykat)
+Stɑrry Shivɑm (github.com/starry69)
+André Nasturas (github.com/andrenasturas)
+Denis Kasak (github.com/dkasak)
+Maximilian Hils (github.com/mhils)
+BarkeH (github.com/BarkeH)
+cav71 (github.com/cav71)
+Crozzers (github.com/Crozzers)
+Bastian Venthur (https://github.com/venthur), removed Python2 support
+gitbra (github.com/gitbra)
+Łukasz Langa (github.com/ambv)
+Max Omdal (github.com/momja)
+Kishore (github.com/jk6521)
+Ircama (github.com/Ircama)
+Ankit Mahato (github.com/animator)
+Eric Dufresne (github.com/edufresne)
+Lyra Rebane (github.com/rebane2001)
+Raul Bocanegra Algarra (github.com/raulbocanegra)
diff --git a/Makefile b/Makefile
index b6f88c9b..d2a9a72f 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,10 @@ test:
testone:
cd test && python test.py -- -knownfailure
+.PHONY: testredos
+testredos:
+ python test/test_redos.py
+
.PHONY: pygments
pygments:
[[ -d deps/pygments ]] || ( \
diff --git a/README.md b/README.md
index 7c651636..f1e89c9f 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ was written to closely match the behaviour of the original Perl-implemented
Markdown.pl. Markdown2 also comes with a number of extensions (called
"extras") for things like syntax coloring, tables, header-ids. See the
"Extra Syntax" section below. "markdown2" supports all Python versions
-2.6+ or 3.3+ (and pypy and jython, though I don't frequently test those).
+3.5+ (and pypy and jython, though I don't frequently test those).
There is another [Python
markdown.py](https://python-markdown.github.io/). However, at
@@ -28,14 +28,12 @@ your consideration.
Follow @trentmick
for updates to python-markdown2.
-Travis-ci.org test status: [](http://travis-ci.org/trentm/python-markdown2)
-
-
# Install
To install it in your Python installation run *one* of the following:
pip install markdown2
+ pip install markdown2[all] # to install all optional dependencies (eg: Pygments for code syntax highlighting)
pypm install markdown2 # if you use ActivePython (activestate.com/activepython)
easy_install markdown2 # if this is the best you have
python setup.py install
@@ -51,14 +49,14 @@ As a module:
```python
>>> import markdown2
>>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)`
-u'
`.
- text = self._strict_tag_block_re.sub(hash_html_block_sub, text)
+ text = self._strict_tag_block_sub(text, self._block_tags_a, hash_html_block_sub)
# Now match more liberally, simply from `\n` to `\n`
text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
+ # now do the same for spans that are acting like blocks
+ # eg: an anchor split over multiple lines for readability
+ text = self._strict_tag_block_sub(
+ text, self._span_tags,
+ # inline elements can't contain block level elements, so only span gamut is required
+ lambda t: hash_html_block_sub(self._run_span_gamut(t))
+ )
+
# Special case just for . It was easier to make a special
# case than to make the other regex more complicated.
if " str:
+ '''
+ Finds and substitutes HTML blocks within blocks of text
+
+ Args:
+ text: the text to search
+ html_tags_re: a regex pattern of HTML block tags to match against.
+ For example, `Markdown._block_tags_a`
+ callback: callback function that receives the found HTML text block and returns a new str
+ allow_indent: allow matching HTML blocks that are not completely outdented
+ '''
+ tag_count = 0
+ current_tag = html_tags_re
+ block = ''
+ result = ''
+
+ for chunk in text.splitlines(True):
+ is_markup = re.match(
+ r'^(\s{{0,{}}})(?:(?=
))?(?({})\b>?)'.format('' if allow_indent else '0', current_tag), chunk
+ )
+ block += chunk
+
+ if is_markup:
+ if chunk.startswith('%s' % is_markup.group(1)):
+ tag_count -= 1
+ else:
+ # if close tag is in same line
+ if self._tag_is_closed(is_markup.group(3), chunk):
+ # we must ignore these
+ is_markup = None
+ else:
+ tag_count += 1
+ current_tag = is_markup.group(3)
+
+ if tag_count == 0:
+ if is_markup:
+ block = callback(block.rstrip('\n')) # remove trailing newline
+ current_tag = html_tags_re
+ result += block
+ block = ''
+
+ result += block
+
+ return result
+
+ def _tag_is_closed(self, tag_name: str, text: str) -> bool:
+ # super basic check if number of open tags == number of closing tags
+ return len(re.findall('<%s(?:.*?)>' % tag_name, text)) == len(re.findall('%s>' % tag_name, text))
+
+ @mark_stage(Stage.LINK_DEFS)
+ def _strip_link_definitions(self, text: str) -> str:
# Strips link definitions from text, stores the URLs and titles in
# hash references.
less_than_tab = self.tab_width - 1
@@ -818,7 +1152,7 @@ def _strip_link_definitions(self, text):
""" % less_than_tab, re.X | re.M | re.U)
return _link_def_re.sub(self._extract_link_def_sub, text)
- def _extract_link_def_sub(self, match):
+ def _extract_link_def_sub(self, match: re.Match) -> str:
id, url, title = match.groups()
key = id.lower() # Link IDs are case-insensitive
self.urls[key] = self._encode_amps_and_angles(url)
@@ -826,65 +1160,7 @@ def _extract_link_def_sub(self, match):
self.titles[key] = title
return ""
- def _do_numbering(self, text):
- ''' We handle the special extension for generic numbering for
- tables, figures etc.
- '''
- # First pass to define all the references
- self.regex_defns = re.compile(r'''
- \[\#(\w+)\s* # the counter. Open square plus hash plus a word \1
- ([^@]*)\s* # Some optional characters, that aren't an @. \2
- @(\w+) # the id. Should this be normed? \3
- ([^\]]*)\] # The rest of the text up to the terminating ] \4
- ''', re.VERBOSE)
- self.regex_subs = re.compile(r"\[@(\w+)\s*\]") # [@ref_id]
- counters = {}
- references = {}
- replacements = []
- definition_html = '{}{}{}'
- reference_html = '{}'
- for match in self.regex_defns.finditer(text):
- # We must have four match groups otherwise this isn't a numbering reference
- if len(match.groups()) != 4:
- continue
- counter = match.group(1)
- text_before = match.group(2)
- ref_id = match.group(3)
- text_after = match.group(4)
- number = counters.get(counter, 1)
- references[ref_id] = (number, counter)
- replacements.append((match.start(0),
- definition_html.format(counter,
- ref_id,
- text_before,
- number,
- text_after),
- match.end(0)))
- counters[counter] = number + 1
- for repl in reversed(replacements):
- text = text[:repl[0]] + repl[1] + text[repl[2]:]
-
- # Second pass to replace the references with the right
- # value of the counter
- # Fwiw, it's vaguely annoying to have to turn the iterator into
- # a list and then reverse it but I can't think of a better thing to do.
- for match in reversed(list(self.regex_subs.finditer(text))):
- number, counter = references.get(match.group(1), (None, None))
- if number is not None:
- repl = reference_html.format(counter,
- match.group(1),
- number)
- else:
- repl = reference_html.format(match.group(1),
- 'countererror',
- '?' + match.group(1) + '?')
- if "smarty-pants" in self.extras:
- repl = repl.replace('"', self._escape_table['"'])
-
- text = text[:match.start()] + repl + text[match.end():]
- return text
-
- def _extract_footnote_def_sub(self, match):
+ def _extract_footnote_def_sub(self, match: re.Match) -> str:
id, text = match.groups()
text = _dedent(text, skip_first_line=not text.startswith('\n')).strip()
normed_id = re.sub(r'\W', '-', id)
@@ -893,7 +1169,7 @@ def _extract_footnote_def_sub(self, match):
self.footnotes[normed_id] = text + "\n\n"
return ""
- def _strip_footnote_definitions(self, text):
+ def _strip_footnote_definitions(self, text: str) -> str:
"""A footnote definition looks like this:
[^note-id]: Text of the note.
@@ -926,15 +1202,13 @@ def _strip_footnote_definitions(self, text):
re.X | re.M)
return footnote_def_re.sub(self._extract_footnote_def_sub, text)
- _hr_re = re.compile(r'^[ ]{0,3}([-_*][ ]{0,2}){3,}$', re.M)
+ _hr_re = re.compile(r'^[ ]{0,3}([-_*])[ ]{0,2}(\1[ ]{0,2}){2,}$', re.M)
- def _run_block_gamut(self, text):
+ @mark_stage(Stage.BLOCK_GAMUT)
+ def _run_block_gamut(self, text: str) -> str:
# These are all the transformations that form block-level
# tags like paragraphs, headers, and list items.
- if "fenced-code-blocks" in self.extras:
- text = self._do_fenced_code_blocks(text)
-
text = self._do_headers(text)
# Do Horizontal Rules:
@@ -947,13 +1221,6 @@ def _run_block_gamut(self, text):
text = self._do_lists(text)
- if "pyshell" in self.extras:
- text = self._prepare_pyshell_blocks(text)
- if "wiki-tables" in self.extras:
- text = self._do_wiki_tables(text)
- if "tables" in self.extras:
- text = self._do_tables(text)
-
text = self._do_code_blocks(text)
text = self._do_block_quotes(text)
@@ -968,141 +1235,8 @@ def _run_block_gamut(self, text):
return text
- def _pyshell_block_sub(self, match):
- lines = match.group(0).splitlines(0)
- _dedentlines(lines)
- indent = ' ' * self.tab_width
- s = ('\n' # separate from possible cuddled paragraph
- + indent + ('\n'+indent).join(lines)
- + '\n\n')
- return s
-
- def _prepare_pyshell_blocks(self, text):
- """Ensure that Python interactive shell sessions are put in
- code blocks -- even if not properly indented.
- """
- if ">>>" not in text:
- return text
-
- less_than_tab = self.tab_width - 1
- _pyshell_block_re = re.compile(r"""
- ^([ ]{0,%d})>>>[ ].*\n # first line
- ^(\1.*\S+.*\n)* # any number of subsequent lines
- ^\n # ends with a blank line
- """ % less_than_tab, re.M | re.X)
-
- return _pyshell_block_re.sub(self._pyshell_block_sub, text)
-
- def _table_sub(self, match):
- trim_space_re = '^[ \t\n]+|[ \t\n]+$'
- trim_bar_re = r'^\||\|$'
- split_bar_re = r'^\||(?' % self._html_class_str_from_tag('table'), '', '
']
- cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
- for col_idx, col in enumerate(cols):
- hlines.append('
')
- hlines.append('')
-
- # tbody
- hlines.append('')
- for line in body.strip('\n').split('\n'):
- hlines.append('
')
- cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
- for col_idx, col in enumerate(cols):
- hlines.append('
',
@@ -2093,6 +2145,10 @@ def _add_footnotes(self, text):
if not self.footnote_return_symbol:
self.footnote_return_symbol = "↩"
+ # self.footnotes is generated in _strip_footnote_definitions, which runs re.sub on the whole
+ # text. This means that the dict keys are inserted in order of appearance. Use the dict to
+ # sort footnote ids by that same order
+ self.footnote_ids.sort(key=lambda a: list(self.footnotes.keys()).index(a))
for i, id in enumerate(self.footnote_ids):
if i != 0:
footer.append('')
@@ -2127,7 +2183,7 @@ def _add_footnotes(self, text):
_naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I)
_naked_gt_re = re.compile(r'''(?''', re.I)
- def _encode_amps_and_angles(self, text):
+ def _encode_amps_and_angles(self, text: str) -> str:
# Smart processing for ampersands and angle brackets that need
# to be encoded.
text = _AMPERSAND_RE.sub('&', text)
@@ -2141,23 +2197,29 @@ def _encode_amps_and_angles(self, text):
text = self._naked_gt_re.sub('>', text)
return text
- _incomplete_tags_re = re.compile("<(/?\w+[\s/]+?)")
+ _incomplete_tags_re = re.compile(r"<(!--|/?\w+?(?!\w)\s*?.+?(?:[\s/]+?|$))")
- def _encode_incomplete_tags(self, text):
+ def _encode_incomplete_tags(self, text: str) -> str:
if self.safe_mode not in ("replace", "escape"):
return text
- return self._incomplete_tags_re.sub("<\\1", text)
+ if self._is_auto_link(text):
+ return text # this is not an incomplete tag, this is a link in the form
+
+ def incomplete_tags_sub(match):
+ return match.group().replace('<', '<')
- def _encode_backslash_escapes(self, text):
+ return self._incomplete_tags_re.sub(incomplete_tags_sub, text)
+
+ def _encode_backslash_escapes(self, text: str) -> str:
for ch, escape in list(self._escape_table.items()):
text = text.replace("\\"+ch, escape)
return text
_auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
- def _auto_link_sub(self, match):
+ def _auto_link_sub(self, match: re.Match) -> str:
g1 = match.group(1)
- return '%s' % (g1, g1)
+ return '{}'.format(self._protect_url(https://github.com/g1), g1)
_auto_email_link_re = re.compile(r"""
<
@@ -2169,16 +2231,16 @@ def _auto_link_sub(self, match):
)
>
""", re.I | re.X | re.U)
- def _auto_email_link_sub(self, match):
+ def _auto_email_link_sub(self, match: re.Match) -> str:
return self._encode_email_address(
self._unescape_special_chars(match.group(1)))
- def _do_auto_links(self, text):
+ def _do_auto_links(self, text: str) -> str:
text = self._auto_link_re.sub(self._auto_link_sub, text)
text = self._auto_email_link_re.sub(self._auto_email_link_sub, text)
return text
- def _encode_email_address(self, addr):
+ def _encode_email_address(self, addr: str) -> str:
# Input: an email address, e.g. "foo@example.com"
#
# Output: the email address as a mailto link, with each character
@@ -2198,60 +2260,124 @@ def _encode_email_address(self, addr):
% (''.join(chars), ''.join(chars[7:]))
return addr
- def _do_link_patterns(self, text):
- link_from_hash = {}
- for regex, repl in self.link_patterns:
- replacements = []
- for match in regex.finditer(text):
- if hasattr(repl, "__call__"):
- href = repl(match)
- else:
- href = match.expand(repl)
- replacements.append((match.span(), href))
- for (start, end), href in reversed(replacements):
+ def _unescape_special_chars(self, text: str) -> str:
+ # Swap back in all the special characters we've hidden.
+ hashmap = tuple(self._escape_table.items()) + tuple(self._code_table.items())
+ # html_blocks table is in format {hash: item} compared to usual {item: hash}
+ hashmap += tuple(tuple(reversed(i)) for i in self.html_blocks.items())
+ while True:
+ orig_text = text
+ for ch, hash in hashmap:
+ text = text.replace(hash, ch)
+ if text == orig_text:
+ break
+ return text
- # Do not match against links inside brackets.
- if text[start - 1:start] == '[' and text[end:end + 1] == ']':
- continue
+ def _outdent(self, text: str) -> str:
+ # Remove one level of line-leading tabs or spaces
+ return self._outdent_re.sub('', text)
- # Do not match against links in the standard markdown syntax.
- if text[start - 2:start] == '](' or text[end:end + 2] == '")':
- continue
+ def _hash_span(self, text: str) -> str:
+ '''
+ Wrapper around `_hash_text` that also adds the hash to `self.hash_spans`,
+ meaning it will be automatically unhashed during conversion.
+ '''
+ key = _hash_text(text)
+ self.html_spans[key] = text
+ return key
+
+ @staticmethod
+ def _uniform_outdent(
+ text: str,
+ min_outdent: Optional[str] = None,
+ max_outdent: Optional[str] = None
+ ) -> tuple[str, str]:
+ '''
+ Removes the smallest common leading indentation from each (non empty)
+ line of `text` and returns said indent along with the outdented text.
- # Do not match against links which are escaped.
- if text[start - 3:start] == '"""' and text[end:end + 3] == '"""':
- text = text[:start - 3] + text[start:end] + text[end + 3:]
- continue
+ Args:
+ min_outdent: make sure the smallest common whitespace is at least this size
+ max_outdent: the maximum amount a line can be outdented by
+ '''
- escaped_href = (
- href.replace('"', '"') # b/c of attr quote
- # To avoid markdown and :
- .replace('*', self._escape_table['*'])
- .replace('_', self._escape_table['_']))
- link = '%s' % (escaped_href, text[start:end])
- hash = _hash_text(link)
- link_from_hash[hash] = link
- text = text[:start] + hash + text[end:]
- for hash, link in list(link_from_hash.items()):
- text = text.replace(hash, link)
- return text
+ # find the leading whitespace for every line
+ whitespace: list[Union[str, None]] = [
+ re.findall(r'^[ \t]*', line)[0] if line else None
+ for line in text.splitlines()
+ ]
+ whitespace_not_empty = [i for i in whitespace if i is not None]
+
+ # if no whitespace detected (ie: no lines in code block, issue #505)
+ if not whitespace_not_empty:
+ return '', text
+
+ # get minimum common whitespace
+ outdent = min(whitespace_not_empty)
+ # adjust min common ws to be within bounds
+ if min_outdent is not None:
+ outdent = min([i for i in whitespace_not_empty if i >= min_outdent] or [min_outdent])
+ if max_outdent is not None:
+ outdent = min(outdent, max_outdent)
+
+ outdented = []
+ for line_ws, line in zip(whitespace, text.splitlines(True)):
+ if line.startswith(outdent):
+ # if line starts with smallest common ws, dedent it
+ outdented.append(line.replace(outdent, '', 1))
+ elif line_ws is not None and line_ws < outdent:
+ # if less indented than min common whitespace then outdent as much as possible
+ outdented.append(line.replace(line_ws, '', 1))
+ else:
+ outdented.append(line)
- def _unescape_special_chars(self, text):
- # Swap back in all the special characters we've hidden.
- for ch, hash in list(self._escape_table.items()):
- text = text.replace(hash, ch)
- return text
+ return outdent, ''.join(outdented)
- def _outdent(self, text):
- # Remove one level of line-leading tabs or spaces
- return self._outdent_re.sub('', text)
+ @staticmethod
+ def _uniform_indent(
+ text: str,
+ indent: str,
+ include_empty_lines: bool = False,
+ indent_empty_lines: bool = False
+ ) -> str:
+ '''
+ Uniformly indent a block of text by a fixed amount
+
+ Args:
+ text: the text to indent
+ indent: a string containing the indent to apply
+ include_empty_lines: don't remove whitespace only lines
+ indent_empty_lines: indent whitespace only lines with the rest of the text
+ '''
+ blocks = []
+ for line in text.splitlines(True):
+ if line.strip() or indent_empty_lines:
+ blocks.append(indent + line)
+ elif include_empty_lines:
+ blocks.append(line)
+ else:
+ blocks.append('')
+ return ''.join(blocks)
+
+ @staticmethod
+ def _match_overlaps_substr(text, match: re.Match, substr: str) -> bool:
+ '''
+ Checks if a regex match overlaps with a substring in the given text.
+ '''
+ for instance in re.finditer(re.escape(substr), text):
+ start, end = instance.span()
+ if start <= match.start() <= end:
+ return True
+ if start <= match.end() <= end:
+ return True
+ return False
class MarkdownWithExtras(Markdown):
"""A markdowner class that enables most extras:
- footnotes
- - code-color (only has effect if 'pygments' Python module on path)
+ - fenced-code-blocks (only highlights code if 'pygments' Python module on path)
These are not included:
- pyshell (specific to Python-related documenting)
@@ -2259,74 +2385,1523 @@ class MarkdownWithExtras(Markdown):
- link-patterns (because you need to specify some actual
link-patterns anyway)
"""
- extras = ["footnotes", "code-color"]
+ extras = ["footnotes", "fenced-code-blocks"] # type: ignore
-# ---- internal support functions
+# ----------------------------------------------------------
+# Extras
+# ----------------------------------------------------------
+# Base classes
+# ----------------------------------------------------------
-def calculate_toc_html(toc):
- """Return the HTML for the current TOC.
+class Extra(ABC):
+ _registry: dict[str, type['Extra']] = {}
+ _exec_order: dict[Stage, tuple[list[type['Extra']], list[type['Extra']]]] = {}
- This expects the `_toc` attribute to have been set on this instance.
- """
- if toc is None:
- return None
+ name: str
+ '''
+ An identifiable name that users can use to invoke the extra
+ in the Markdown class
+ '''
+ order: tuple[Collection[Union[Stage, type['Extra']]], Collection[Union[Stage, type['Extra']]]]
+ '''
+ Tuple of two iterables containing the stages/extras this extra will run before and
+ after, respectively
+ '''
- def indent():
- return ' ' * (len(h_stack) - 1)
- lines = []
- h_stack = [0] # stack of header-level numbers
- for level, id, name in toc:
- if level > h_stack[-1]:
- lines.append("%s
%s' % (
- indent(), id, name))
- while len(h_stack) > 1:
- h_stack.pop()
- if not lines[-1].endswith("
"):
- lines[-1] += ""
- lines.append("%s" % indent())
- return '\n'.join(lines) + '\n'
+ def __init__(self, md: Markdown, options: Optional[dict]):
+ '''
+ Args:
+ md: An instance of `Markdown`
+ options: a dict of settings to alter the extra's behaviour
+ '''
+ self.md = md
+ self.options = options if options is not None else {}
+ @classmethod
+ def deregister(cls):
+ '''
+ Removes the class from the extras registry and unsets its execution order.
+ '''
+ if cls.name in cls._registry:
+ del cls._registry[cls.name]
-class UnicodeWithAttrs(unicode):
- """A subclass of unicode used for the return value of conversion to
- possibly attach some attributes. E.g. the "toc_html" attribute when
- the "toc" extra is used.
- """
- metadata = None
- toc_html = None
+ for exec_order in Extra._exec_order.values():
+ # find everywhere this extra is mentioned and remove it
+ for section in exec_order:
+ while cls in section:
+ section.remove(cls)
-## {{{ http://code.activestate.com/recipes/577257/ (r1)
-_slugify_strip_re = re.compile(r'[^\w\s-]')
-_slugify_hyphenate_re = re.compile(r'[-\s]+')
-def _slugify(value):
- """
- Normalizes string, converts to lowercase, removes non-alpha characters,
- and converts spaces to hyphens.
+ @classmethod
+ def register(cls):
+ '''
+ Registers the class for use with `Markdown` and calculates its execution order based on
+ the `order` class attribute.
+ '''
+ cls._registry[cls.name] = cls
+
+ for index, item in enumerate((*cls.order[0], *cls.order[1])):
+ before = index < len(cls.order[0])
+ if not isinstance(item, Stage) and issubclass(item, Extra):
+ # eg: FencedCodeBlocks
+ for exec_orders in Extra._exec_order.values():
+ # insert this extra everywhere the other one is mentioned
+ for section in exec_orders:
+ if item in section:
+ to_index = section.index(item)
+ if not before:
+ to_index += 1
+ section.insert(to_index, cls)
+ else:
+ # eg: Stage.PREPROCESS
+ Extra._exec_order.setdefault(item, ([], []))
+ if cls in Extra._exec_order[item][0 if before else 1]:
+ # extra is already runnig after this stage. Don't duplicate that effort
+ continue
+ if before:
+ Extra._exec_order[item][0].insert(0, cls)
+ else:
+ Extra._exec_order[item][1].append(cls)
+
+ @abstractmethod
+ def run(self, text: str) -> str:
+ '''
+ Run the extra against the given text.
+
+ Returns:
+ The new text after being modified by the extra
+ '''
+ ...
+
+ def test(self, text: str) -> bool:
+ '''
+ Check a section of markdown to see if this extra should be run upon it.
+ The default implementation will always return True but it's recommended to override
+ this behaviour to improve performance.
+ '''
+ return True
+
+
+class ItalicAndBoldProcessor(Extra):
+ '''
+ An ABC that provides hooks for dealing with italics and bold syntax.
+ This class is set to trigger both before AND after the italics and bold stage.
+ This allows any child classes to intercept instances of bold or italic syntax and
+ change the output or hash it to prevent it from being processed.
+
+ After the I&B stage any hashes in the `hash_tables` instance variable are replaced.
+ '''
+ name = 'italic-and-bold-processor'
+ order = (Stage.ITALIC_AND_BOLD,), (Stage.ITALIC_AND_BOLD,)
+
+ strong_re = Markdown._strong_re
+ em_re = Markdown._em_re
+
+ def __init__(self, md: Markdown, options: Optional[dict]):
+ super().__init__(md, options)
+ self.hash_table = {}
+
+ def run(self, text):
+ if self.md.order < Stage.ITALIC_AND_BOLD:
+ text = self.strong_re.sub(self.sub, text)
+ text = self.em_re.sub(self.sub, text)
+ else:
+ # push any hashed values back, using a while loop to deal with recursive hashes
+ orig_text = ''
+ while orig_text != text:
+ orig_text = text
+ for key, substr in self.hash_table.items():
+ text = text.replace(key, substr)
+ return text
+
+ @abstractmethod
+ def sub(self, match: re.Match) -> str:
+ # do nothing. Let `Markdown._do_italics_and_bold` do its thing later
+ return match.string[match.start(): match.end()]
+
+ def sub_hash(self, match: re.Match) -> str:
+ substr = match.string[match.start(): match.end()]
+ key = _hash_text(substr)
+ self.hash_table[key] = substr
+ return key
+
+ def test(self, text):
+ if self.md.order < Stage.ITALIC_AND_BOLD:
+ return '*' in text or '_' in text
+ return self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
+
+
+class _LinkProcessorExtraOpts(TypedDict, total=False):
+ '''Options for the `LinkProcessor` extra'''
+ tags: List[str]
+ '''List of tags to be processed by the extra. Default is `['a', 'img']`'''
+ inline: bool
+ '''Whether to process inline links. Default: True'''
+ ref: bool
+ '''Whether to process reference links. Default: True'''
+
+
+class LinkProcessor(Extra):
+ name = 'link-processor'
+ order = (Stage.ITALIC_AND_BOLD,), (Stage.ESCAPE_SPECIAL,)
+ options: _LinkProcessorExtraOpts
+
+ def __init__(self, md: Markdown, options: Optional[dict]):
+ options = options or {}
+ super().__init__(md, options)
+
+ def parse_inline_anchor_or_image(self, text: str, _link_text: str, start_idx: int) -> Optional[Tuple[str, str, Optional[str], int]]:
+ '''
+ Parse a string and extract a link from it. This can be an inline anchor or an image.
+
+ Args:
+ text: the whole text containing the link
+ link_text: the human readable text inside the link
+ start_idx: the index of the link within `text`
+
+ Returns:
+ None if a link was not able to be parsed from `text`.
+ If successful, a tuple is returned containing:
+
+ 1. potentially modified version of the `text` param
+ 2. the URL
+ 3. the title (can be None if not present)
+ 4. the index where the link ends within text
+ '''
+ idx = self.md._find_non_whitespace(text, start_idx + 1)
+ if idx == len(text):
+ return
+ end_idx = idx
+ has_anglebrackets = text[idx] == "<"
+ if has_anglebrackets:
+ end_idx = self.md._find_balanced(text, end_idx+1, "<", ">")
+ end_idx = self.md._find_balanced(text, end_idx, "(", ")")
+ match = self.md._inline_link_title.search(text, idx, end_idx)
+ if not match:
+ return
+ url, title = text[idx:match.start()], match.group("title")
+ if has_anglebrackets:
+ url = self.md._strip_anglebrackets.sub(r'\1', url)
+ return text, url, title, end_idx
+
+ def process_link_shortrefs(self, text: str, link_text: str, start_idx: int) -> Tuple[Optional[re.Match], str]:
+ '''
+ Detects shortref links within a string and converts them to normal references
+
+ Args:
+ text: the whole text containing the link
+ link_text: the human readable text inside the link
+ start_idx: the index of the link within `text`
+
+ Returns:
+ A tuple containing:
+
+ 1. A potential `re.Match` against the link reference within `text` (will be None if not found)
+ 2. potentially modified version of the `text` param
+ '''
+ match = None
+ # check if there's no tailing id section
+ if link_text and re.match(r'[ ]?(?:\n[ ]*)?(?!\[)', text[start_idx:]):
+ # try a match with `[]` inserted into the text
+ match = self.md._tail_of_reference_link_re.match(f'{text[:start_idx]}[]{text[start_idx:]}', start_idx)
+ if match:
+ # if we get a match, we'll have to modify the `text` variable to insert the `[]`
+ # but we ONLY want to do that if the link_id is valid. This makes sure that we
+ # don't get stuck in any loops and also that when a user inputs `[abc]` we don't
+ # output `[abc][]` in the final HTML
+ if (match.group("id").lower() or link_text.lower()) in self.md.urls:
+ text = f'{text[:start_idx]}[]{text[start_idx:]}'
+ else:
+ match = None
+
+ return match, text
+
+ def parse_ref_anchor_or_ref_image(self, text: str, link_text: str, start_idx: int) -> Optional[Tuple[str, Optional[str], Optional[str], int]]:
+ '''
+ Parse a string and extract a link from it. This can be a reference anchor or image.
+
+ Args:
+ text: the whole text containing the link
+ link_text: the human readable text inside the link
+ start_idx: the index of the link within `text`
+
+ Returns:
+ None if a link was not able to be parsed from `text`.
+ If successful, a tuple is returned containing:
+
+ 1. potentially modified version of the `text` param
+ 2. the URL (can be None if the reference doesn't exist)
+ 3. the title (can be None if not present)
+ 4. the index where the link ends within text
+ '''
+ match = None
+ if 'link-shortrefs' in self.md.extras:
+ match, text = self.process_link_shortrefs(text, link_text, start_idx)
+
+ match = match or self.md._tail_of_reference_link_re.match(text, start_idx)
+ if not match:
+ # text isn't markup
+ return
+
+ link_id = match.group("id").lower() or link_text.lower() # for links like [this][]
+
+ url = self.md.urls.get(link_id)
+ title = self.md.titles.get(link_id)
+ url_end_idx = match.end()
+
+ return text, url, title, url_end_idx
+
+ def process_image(self, url: str, title_attr: str, link_text: str) -> Tuple[str, int]:
+ '''
+ Takes a URL, title and link text and returns an HTML `` tag
+
+ Args:
+ url: the image URL/src
+ title_attr: a string containing the title attribute of the tag (eg: `' title="..."'`)
+ link_text: the human readable text portion of the link
+
+ Returns:
+ A tuple containing:
+
+ 1. The HTML string
+ 2. The length of the opening HTML tag in the string. For `` it's the whole string.
+ This section will be skipped by the link processor
+ '''
+ img_class_str = self.md._html_class_str_from_tag("img")
+ result = (
+ f' Tuple[str, int]:
+ '''
+ Takes a URL, title and link text and returns an HTML `` tag
+
+ Args:
+ url: the URL
+ title_attr: a string containing the title attribute of the tag (eg: `' title="..."'`)
+ link_text: the human readable text portion of the link
+
+ Returns:
+ A tuple containing:
+
+ 1. The HTML string
+ 2. The length of the opening HTML tag in the string. This section will be skipped
+ by the link processor
+ '''
+ if self.md.safe_mode and not self.md._safe_href.match(url):
+ result_head = f''
+ else:
+ result_head = f''
+
+ return f'{result_head}{link_text}', len(result_head)
+
+ def run(self, text: str):
+ MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24
+
+ # `anchor_allowed_pos` is used to support img links inside
+ # anchors, but not anchors inside anchors. An anchor's start
+ # pos must be `>= anchor_allowed_pos`.
+ anchor_allowed_pos = 0
+
+ curr_pos = 0
+
+ while True:
+ # The next '[' is the start of:
+ # - an inline anchor: [text](url "title")
+ # - a reference anchor: [text][id]
+ # - an inline img: 
+ # - a reference img: ![text][id]
+ # - a footnote ref: [^id]
+ # (Only if 'footnotes' extra enabled)
+ # - a footnote defn: [^id]: ...
+ # (Only if 'footnotes' extra enabled) These have already
+ # been stripped in _strip_footnote_definitions() so no
+ # need to watch for them.
+ # - a link definition: [id]: url "title"
+ # These have already been stripped in
+ # _strip_link_definitions() so no need to watch for them.
+ # - not markup: [...anything else...
+ try:
+ start_idx = text.index('[', curr_pos)
+ except ValueError:
+ break
+ text_length = len(text)
+
+ # Find the matching closing ']'.
+ # Markdown.pl allows *matching* brackets in link text so we
+ # will here too. Markdown.pl *doesn't* currently allow
+ # matching brackets in img alt text -- we'll differ in that
+ # regard.
+ bracket_depth = 0
+
+ for p in range(
+ start_idx + 1,
+ min(start_idx + MAX_LINK_TEXT_SENTINEL, text_length)
+ ):
+ ch = text[p]
+ if ch == ']':
+ bracket_depth -= 1
+ if bracket_depth < 0:
+ break
+ elif ch == '[':
+ bracket_depth += 1
+ else:
+ # Closing bracket not found within sentinel length.
+ # This isn't markup.
+ curr_pos = start_idx + 1
+ continue
+ link_text = text[start_idx + 1: p]
+
+ # Fix for issue 341 - Injecting XSS into link text
+ if self.md.safe_mode:
+ link_text = self.md._hash_html_spans(link_text)
+ link_text = self.md._unhash_html_spans(link_text)
+
+ # Possibly a footnote ref?
+ if "footnotes" in self.md.extras and link_text.startswith("^"):
+ normed_id = re.sub(r'\W', '-', link_text[1:])
+ if normed_id in self.md.footnotes:
+ result = (
+ f''
+ # insert special footnote marker that's easy to find and match against later
+ f'{self.md._footnote_marker}-{normed_id}'
+ )
+ text = text[:start_idx] + result + text[p+1:]
+ else:
+ # This id isn't defined, leave the markup alone.
+ curr_pos = p + 1
+ continue
+
+ # Now determine what this is by the remainder.
+ p += 1
+
+ # -- Extract the URL, title and end index from the link
+
+ # inline anchor or inline img
+ if text[p:p + 1] == '(':
+ if not self.options.get('inline', True):
+ curr_pos = start_idx + 1
+ continue
+
+ parsed = self.parse_inline_anchor_or_image(text, link_text, p)
+ if not parsed:
+ # text isn't markup
+ curr_pos = start_idx + 1
+ continue
+
+ text, url, title, url_end_idx = parsed
+ url = self.md._unhash_html_spans(url, code=True)
+ # reference anchor or reference img
+ else:
+ if not self.options.get('ref', True):
+ curr_pos = start_idx + 1
+ continue
+
+ parsed = self.parse_ref_anchor_or_ref_image(text, link_text, p)
+ if not parsed:
+ curr_pos = start_idx + 1
+ continue
+
+ text, url, title, url_end_idx = parsed
+ if url is None:
+ # This id isn't defined, leave the markup alone.
+ # set current pos to end of link title and continue from there
+ curr_pos = p
+ continue
+
+ # -- Encode and hash the URL and title to avoid conflicts with italics/bold
+
+ url = (
+ url
+ .replace('*', self.md._escape_table['*'])
+ .replace('_', self.md._escape_table['_'])
+ )
+ if title:
+ title = (
+ _xml_escape_attr(title)
+ .replace('*', self.md._escape_table['*'])
+ .replace('_', self.md._escape_table['_'])
+ )
+ title_str = f' title="{title}"'
+ else:
+ title_str = ''
+
+ # -- Process the anchor/image
+
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
+ if is_img:
+ if 'img' not in self.options.get('tags', ['img']):
+ curr_pos = start_idx + 1
+ continue
+
+ start_idx -= 1
+ result, skip = self.process_image(url, title_str, link_text)
+ elif start_idx >= anchor_allowed_pos:
+ if 'a' not in self.options.get('tags', ['a']):
+ curr_pos = start_idx + 1
+ continue
+
+ result, skip = self.process_anchor(url, title_str, link_text)
+ else:
+ # anchor not allowed here/invalid markup
+ curr_pos = start_idx + 1
+ continue
+
+ if "smarty-pants" in self.md.extras:
+ result = result.replace('"', self.md._escape_table['"'])
+
+ # allowed from curr_pos onwards, allowed from anchor_allowed_pos onwards.
+ # this means images can exist within `` tags but anchors can only come after the
+ # current anchor has been closed
+ curr_pos = start_idx + skip
+ anchor_allowed_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[url_end_idx:]
+
+ return text
+
+ def test(self, text):
+ return '(' in text or '[' in text
+
+
+# User facing extras
+# ----------------------------------------------------------
+
+
+class Admonitions(Extra):
+ '''
+ Enable parsing of RST admonitions
+ '''
+
+ name = 'admonitions'
+ order = (Stage.BLOCK_GAMUT, Stage.LINK_DEFS), ()
+
+ admonitions = r'admonition|attention|caution|danger|error|hint|important|note|tip|warning'
+
+ admonitions_re = re.compile(r'''
+ ^(\ *)\.\.\ (%s)::\ * # $1 leading indent, $2 the admonition
+ (.*)? # $3 admonition title
+ ((?:\s*\n\1\ {3,}.*)+?) # $4 admonition body (required)
+ (?=\s*(?:\Z|\n{4,}|\n\1?\ {0,2}\S)) # until EOF, 3 blank lines or something less indented
+ ''' % admonitions,
+ re.IGNORECASE | re.MULTILINE | re.VERBOSE
+ )
+
+ def test(self, text):
+ return self.admonitions_re.search(text) is not None
+
+ def sub(self, match: re.Match) -> str:
+ lead_indent, admonition_name, title, body = match.groups()
+
+ admonition_type = '%s' % admonition_name
+
+ # figure out the class names to assign the block
+ if admonition_name.lower() == 'admonition':
+ admonition_class = 'admonition'
+ else:
+ admonition_class = 'admonition %s' % admonition_name.lower()
+
+ # titles are generally optional
+ if title:
+ title = '%s' % title
+
+ # process the admonition body like regular markdown
+ body = self.md._run_block_gamut("\n%s\n" % self.md._uniform_outdent(body)[1])
+
+ # indent the body before placing inside the aside block
+ admonition = self.md._uniform_indent(
+ '{}\n{}\n\n{}\n'.format(admonition_type, title, body),
+ self.md.tab, False
+ )
+ # wrap it in an aside
+ admonition = ''.format(admonition_class, admonition)
+ # now indent the whole admonition back to where it started
+ return self.md._uniform_indent(admonition, lead_indent, False)
+
+ def run(self, text):
+ return self.admonitions_re.sub(self.sub, text)
+
+
+class Alerts(Extra):
+ '''
+ Markdown Alerts as per
+ https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts
+ '''
+
+ name = 'alerts'
+ order = (), (Stage.BLOCK_QUOTES, )
+
+ alert_re = re.compile(r'''
+
')
+ return super().tags(lexer_name)
+
+
+class MiddleWordEm(ItalicAndBoldProcessor):
+ '''
+ Allows or disallows emphasis syntax in the middle of words,
+ defaulting to allow. Disabling this means that `this_text_here` will not be
+ converted to `thistexthere`.
+ '''
+ name = 'middle-word-em'
+ order = (CodeFriendly,), (Stage.ITALIC_AND_BOLD,)
+
+ def __init__(self, md: Markdown, options: Union[dict, bool, None]):
+ '''
+ Args:
+ md: the markdown instance
+ options: can be bool for backwards compatibility but will be converted to a dict
+ in the constructor. All options are:
+ - allowed (bool): whether to allow emphasis in the middle of a word.
+ If `options` is a bool it will be placed under this key.
+ '''
+ if isinstance(options, bool):
+ options = {'allowed': options}
+ else:
+ options = options or {}
+ options.setdefault('allowed', True)
+ super().__init__(md, options)
+
+ self.middle_word_em_re = re.compile(
+ r'''
+ (? self.md.stage:
+ text = text.replace(self.hash_table['_'], '_')
+ text = text.replace(self.hash_table['*'], '*')
+
+ return text
+
+ def sub(self, match: re.Match):
+ if match.re != self.middle_word_em_re:
+ return super().sub(match)
+
+ syntax = match.group(1)
+ return self.hash_table[syntax]
+
+
+class Numbering(Extra):
+ '''
+ Support of generic counters. Non standard extension to
+ allow sequential numbering of figures, tables, equations, exhibits etc.
+ '''
+
+ name = 'numbering'
+ order = (Stage.LINK_DEFS,), ()
+
+ def run(self, text):
+ # First pass to define all the references
+ regex_defns = re.compile(r'''
+ \[\#(\w+) # the counter. Open square plus hash plus a word \1
+ ([^@]*) # Some optional characters, that aren't an @. \2
+ @(\w+) # the id. Should this be normed? \3
+ ([^\]]*)\] # The rest of the text up to the terminating ] \4
+ ''', re.VERBOSE)
+ regex_subs = re.compile(r"\[@(\w+)\s*\]") # [@ref_id]
+ counters = {}
+ references = {}
+ replacements = []
+ definition_html = '{}{}{}'
+ reference_html = '{}'
+ for match in regex_defns.finditer(text):
+ # We must have four match groups otherwise this isn't a numbering reference
+ if len(match.groups()) != 4:
+ continue
+ counter = match.group(1)
+ text_before = match.group(2).strip()
+ ref_id = match.group(3)
+ text_after = match.group(4)
+ number = counters.get(counter, 1)
+ references[ref_id] = (number, counter)
+ replacements.append((match.start(0),
+ definition_html.format(counter,
+ ref_id,
+ text_before,
+ number,
+ text_after),
+ match.end(0)))
+ counters[counter] = number + 1
+ for repl in reversed(replacements):
+ text = text[:repl[0]] + repl[1] + text[repl[2]:]
+
+ # Second pass to replace the references with the right
+ # value of the counter
+ # Fwiw, it's vaguely annoying to have to turn the iterator into
+ # a list and then reverse it but I can't think of a better thing to do.
+ for match in reversed(list(regex_subs.finditer(text))):
+ number, counter = references.get(match.group(1), (None, None))
+ if number is not None:
+ repl = reference_html.format(counter,
+ match.group(1),
+ number)
+ else:
+ repl = reference_html.format(match.group(1),
+ 'countererror',
+ '?' + match.group(1) + '?')
+ if "smarty-pants" in self.md.extras:
+ repl = repl.replace('"', self.md._escape_table['"'])
+
+ text = text[:match.start()] + repl + text[match.end():]
+ return text
+
+
+class PyShell(Extra):
+ '''
+ Treats unindented Python interactive shell sessions as
+ blocks.
+ '''
+
+ name = 'pyshell'
+ order = (), (Stage.LISTS,)
+
+ def test(self, text):
+ return ">>>" in text
+
+ def sub(self, match: re.Match) -> str:
+ if "fenced-code-blocks" in self.md.extras:
+ dedented = _dedent(match.group(0))
+ return self.md.extra_classes['fenced-code-blocks'].run("```pycon\n" + dedented + "```\n")
+
+ lines = match.group(0).splitlines(0)
+ _dedentlines(lines)
+ indent = ' ' * self.md.tab_width
+ s = ('\n' # separate from possible cuddled paragraph
+ + indent + ('\n'+indent).join(lines)
+ + '\n')
+ return s
+
+ def run(self, text):
+ less_than_tab = self.md.tab_width - 1
+ _pyshell_block_re = re.compile(r"""
+ ^([ ]{0,%d})>>>[ ].*\n # first line
+ ^(\1[^\S\n]*\S.*\n)* # any number of subsequent lines with at least one character
+ (?=^\1?\n|\Z) # ends with a blank line or end of document
+ """ % less_than_tab, re.M | re.X)
+
+ return _pyshell_block_re.sub(self.sub, text)
+
+
+class SmartyPants(Extra):
+ '''
+ Replaces ' and " with curly quotation marks or curly
+ apostrophes. Replaces --, ---, ..., and . . . with en dashes, em dashes,
+ and ellipses.
+ '''
+ name = 'smarty-pants'
+ order = (), (Stage.SPAN_GAMUT,)
+
+ _opening_single_quote_re = re.compile(r"(? str:
+ text = self._apostrophe_year_re.sub(r"’\1", text)
+ for c in self._contractions:
+ text = text.replace("'%s" % c, "’%s" % c)
+ text = text.replace("'%s" % c.capitalize(),
+ "’%s" % c.capitalize())
+ return text
+
+ def run(self, text):
+ """Fancifies 'single quotes', "double quotes", and apostrophes.
+ Converts --, ---, and ... into en dashes, em dashes, and ellipses.
+
+ Inspiration is:
+ See "test/tm-cases/smarty_pants.text" for a full discussion of the
+ support here and
+ for a
+ discussion of some diversion from the original SmartyPants.
+ """
+ if "'" in text: # guard for perf
+ text = self.contractions(text)
+ text = self._opening_single_quote_re.sub("‘", text)
+ text = self._closing_single_quote_re.sub("’", text)
+
+ if '"' in text: # guard for perf
+ text = self._opening_double_quote_re.sub("“", text)
+ text = self._closing_double_quote_re.sub("”", text)
+
+ text = text.replace("---", "—")
+ text = text.replace("--", "–")
+ text = text.replace("...", "…")
+ text = text.replace(" . . . ", "…")
+ text = text.replace(". . .", "…")
+
+ # TODO: Temporary hack to fix https://github.com/trentm/python-markdown2/issues/150
+ if "footnotes" in self.md.extras and "footnote-ref" in text:
+ # Quotes in the footnote back ref get converted to "smart" quotes
+ # Change them back here to ensure they work.
+ text = text.replace('class="footnote-ref”', 'class="footnote-ref"')
+
+ return text
+
+ def test(self, text):
+ return any(i in text for i in (
+ "'",
+ '"',
+ '--',
+ '...',
+ '. . .'
+ ))
+
+
+class Strike(Extra):
+ '''
+ Text inside of double tilde is ~~strikethrough~~
+ '''
+ name = 'strike'
+ order = (Stage.ITALIC_AND_BOLD,), ()
+
+ _strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
+
+ def run(self, text):
+ return self._strike_re.sub(r"\1", text)
+
+ def test(self, text):
+ return '~~' in text
+
+
+class Tables(Extra):
+ '''
+ Tables using the same format as GFM
+ and
+ PHP-Markdown Extra .
+ '''
+ name = 'tables'
+ order = (), (Stage.LISTS,)
+
+ def run(self, text):
+ """Copying PHP-Markdown and GFM table syntax. Some regex borrowed from
+ https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538
+ """
+ less_than_tab = self.md.tab_width - 1
+ table_re = re.compile(r'''
+ (?:(?<=\n)|\A\n?) # leading blank line
+
+ ^[ ]{0,%d} # allowed whitespace
+ (.*[|].*)[ ]*\n # $1: header row (at least one pipe)
+
+ ^[ ]{0,%d} # allowed whitespace
+ ( # $2: underline row
+ # underline row with leading bar
+ (?: \|\ *:?-+:?\ * )+ \|? \s?[ ]*\n
+ |
+ # or, underline row without leading bar
+ (?: \ *:?-+:?\ *\| )+ (?: \ *:?-+:?\ * )? \s?[ ]*\n
+ )
+
+ ( # $3: data rows
+ (?:
+ ^[ ]{0,%d}(?!\ ) # ensure line begins with 0 to less_than_tab spaces
+ .*\|.*[ ]*\n
+ )*
+ )
+ ''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
+ return table_re.sub(self.sub, text)
+
+ def sub(self, match: re.Match) -> str:
+ trim_space_re = r'^\s+|\s+$'
+ trim_bar_re = r'^\||\|$'
+ split_bar_re = r'^\||(?' % self.md._html_class_str_from_tag('table'), '' % self.md._html_class_str_from_tag('thead'), '
']
+ cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
+ for col_idx, col in enumerate(cols):
+ hlines.append('
')
+ hlines.append('')
+
+ # tbody
+ body = body.strip('\n')
+ if body:
+ hlines.append('')
+ for line in body.split('\n'):
+ hlines.append('
')
+ cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
+ for col_idx, col in enumerate(cols):
+ hlines.append('
')
+ hlines.append('')
+ hlines.append('')
+
+ return '\n'.join(hlines) + '\n'
+
+
+class TelegramSpoiler(Extra):
+ name = 'tg-spoiler'
+ order = (), (Stage.ITALIC_AND_BOLD,)
+
+ _tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S)
+
+ def run(self, text):
+ return self._tg_spoiler_re.sub(r"\1", text)
+
+ def test(self, text):
+ return '||' in text
+
+
+class Underline(Extra):
+ '''
+ Text inside of double dash is --underlined--.
+ '''
+ name = 'underline'
+ order = (Stage.ITALIC_AND_BOLD,), ()
+
+ _underline_re = re.compile(r"(?)(?=\S)(.+?)(?<=\S)(?)", re.S)
+
+ def run(self, text):
+ return self._underline_re.sub(r"\1", text)
+
+ def test(self, text):
+ return '--' in text
+
+
+class _WavedromExtraOpts(TypedDict, total=False):
+ '''Options for the `Wavedrom` extra'''
+ prefer_embed_svg: bool
+ '''
+ Use the `wavedrom` library to convert diagrams to SVGs and embed them directly.
+ This will only work if the `wavedrom` library has been installed.
+
+ Defaults to `True`
+ '''
+
+
+class Wavedrom(Extra):
+ '''
+ Support for generating Wavedrom digital timing diagrams
+ '''
+ name = 'wavedrom'
+ order = (Stage.CODE_BLOCKS, FencedCodeBlocks), ()
+ options: _WavedromExtraOpts
+
+ def test(self, text):
+ match = FencedCodeBlocks.fenced_code_block_re.search(text)
+ return match is None or match.group(2) == 'wavedrom'
+
+ def sub(self, match: re.Match) -> str:
+ # dedent the block for processing
+ lead_indent, waves = self.md._uniform_outdent(match.group(3))
+ # default tags to wrap the wavedrom block in
+ open_tag, close_tag = ''
+
+ # check if the user would prefer to have the SVG embedded directly
+ embed_svg = self.options.get('prefer_embed_svg', True)
+
+ if embed_svg:
+ try:
+ import wavedrom
+ waves = wavedrom.render(waves).tostring()
+ open_tag, close_tag = '
', '\n
'
+ except ImportError:
+ pass
+
+ # hash SVG to prevent <> chars being messed with
+ self.md._escape_table[waves] = _hash_text(waves)
+
+ return self.md._uniform_indent(
+ '\n{}{}{}\n'.format(open_tag, self.md._escape_table[waves], close_tag),
+ lead_indent, include_empty_lines=True
+ )
+
+ def run(self, text):
+ return FencedCodeBlocks.fenced_code_block_re.sub(self.sub, text)
+
+
+class WikiTables(Extra):
+ '''
+ Google Code Wiki-style tables. See
+ .
+ '''
+ name = 'wiki-tables'
+ order = (Tables,), ()
+
+ def run(self, text):
+ less_than_tab = self.md.tab_width - 1
+ wiki_table_re = re.compile(r'''
+ (?:(?<=\n\n)|\A\n?) # leading blank line
+ ^([ ]{0,%d})\|\|.+?\|\|[ ]*\n # first line
+ (^\1\|\|.+?\|\|\n)* # any number of subsequent lines
+ ''' % less_than_tab, re.M | re.X)
+ return wiki_table_re.sub(self.sub, text)
+
+ def sub(self, match: re.Match) -> str:
+ ttext = match.group(0).strip()
+ rows = []
+ for line in ttext.splitlines(0):
+ line = line.strip()[2:-2].strip()
+ row = [c.strip() for c in re.split(r'(?' % self.md._html_class_str_from_tag('table'))
+ # Check if first cell of first row is a header cell. If so, assume the whole row is a header row.
+ if rows and rows[0] and re.match(r"^\s*~", rows[0][0]):
+ add_hline('' % self.md._html_class_str_from_tag('thead'), 1)
+ add_hline('
', 2)
+ for cell in rows[0]:
+ add_hline(f"
{format_cell(cell)}
", 3)
+ add_hline('
', 2)
+ add_hline('', 1)
+ # Only one header row allowed.
+ rows = rows[1:]
+ # If no more rows, don't create a tbody.
+ if rows:
+ add_hline('', 1)
+ for row in rows:
+ add_hline('
', 2)
+ for cell in row:
+ add_hline(f'
{format_cell(cell)}
', 3)
+ add_hline('
', 2)
+ add_hline('', 1)
+ add_hline('')
+ return '\n'.join(hlines) + '\n'
+
+ def test(self, text):
+ return '||' in text
+
+
+# Register extras
+Admonitions.register()
+Alerts.register()
+Breaks.register()
+CodeFriendly.register()
+FencedCodeBlocks.register()
+Latex.register()
+LinkPatterns.register()
+MarkdownInHTML.register()
+MarkdownFileLinks.register()
+MiddleWordEm.register()
+Mermaid.register()
+Numbering.register()
+PyShell.register()
+SmartyPants.register()
+Strike.register()
+Tables.register()
+TelegramSpoiler.register()
+Underline.register()
+Wavedrom.register()
+WikiTables.register()
+
+
+# ----------------------------------------------------------
+
+
+# ---- internal support functions
+
+
+def calculate_toc_html(toc: Union[list[tuple[int, str, str]], None]) -> Optional[str]:
+ """Return the HTML for the current TOC.
+
+ This expects the `_toc` attribute to have been set on this instance.
+ """
+ if toc is None:
+ return None
+
+ def indent():
+ return ' ' * (len(h_stack) - 1)
+ lines = []
+ h_stack = [0] # stack of header-level numbers
+ for level, id, name in toc:
+ if level > h_stack[-1]:
+ lines.append("%s
diff --git a/test/markdowntest-cases/Strong and em together.html b/test/markdowntest-cases/Strong and em together.html
index 71ec78c7..bab1b98f 100644
--- a/test/markdowntest-cases/Strong and em together.html
+++ b/test/markdowntest-cases/Strong and em together.html
@@ -1,7 +1,7 @@
-
+"""
+ self.assertEqual(expected_toc_html, md.convert(html).toc_html)
+ # Do it again, to check if the toc_html is just appended rather than replaced
+ self.assertEqual(expected_toc_html, md.convert(html).toc_html)
+ # Create different html, and confirm toc_html is replaced
+ html = """
+# I'm new html
+## I don't have to be long, just different
+"""
+ expected_toc_html = """
+"""
+ self.assertEqual(expected_toc_html, md.convert(html).toc_html)
+ test_toc_with_persistent_object.tags = ["toc", "issue208"]
+
class DocTestsTestCase(unittest.TestCase):
def test_api(self):
test = doctest.DocFileTest("api.doctests")
test.runTest()
- # Don't bother on Python 3 because (a) there aren't many inline doctests,
- # and (b) they are more to be didactic than comprehensive test suites.
- if not py3:
- def test_internal(self):
- doctest.testmod(markdown2)
-
-
#---- internal support stuff
@@ -311,16 +326,16 @@ def test_internal(self):
def _xml_escape_sub(match):
escape = match.group(1)
if escape[0] == 'x':
- return unichr(int('0'+escape, base=16))
+ return chr(int('0'+escape, base=16))
else:
- return unichr(int(escape))
+ return chr(int(escape))
_markdown_email_link_re = re.compile(r'(.*?)', re.U)
def _markdown_email_link_sub(match):
href, text = match.groups()
href = _xml_escape_re.sub(_xml_escape_sub, href)
text = _xml_escape_re.sub(_xml_escape_sub, text)
- return '%s' % (href, text)
+ return '{}'.format(href, text)
def norm_html_from_html(html):
"""Normalize (somewhat) Markdown'd HTML.
@@ -330,18 +345,17 @@ def norm_html_from_html(html):
Also normalize EOLs.
"""
- if not isinstance(html, unicode):
+ if not isinstance(html, str):
html = html.decode('utf-8')
html = _markdown_email_link_re.sub(
_markdown_email_link_sub, html)
- if sys.platform == "win32":
- html = html.replace('\r\n', '\n')
+ html = html.replace('\r\n', '\n')
return html
def _display(s):
"""Markup the given string for useful display."""
- if not isinstance(s, unicode):
+ if not isinstance(s, str):
s = s.decode("utf-8")
s = _indent(_escaped_text_from_text(s, "whitespace"), 4)
if not s.endswith('\n'):
@@ -488,7 +502,7 @@ def _escaped_text_from_text(text, escapes="eol"):
# - Add _escaped_html_from_text() with a similar call sig.
import re
- if isinstance(escapes, base_string_type):
+ if isinstance(escapes, str):
if escapes == "eol":
escapes = {'\r\n': "\\r\\n\r\n", '\n': "\\n\n", '\r': "\\r\r"}
elif escapes == "whitespace":
diff --git a/test/test_redos.py b/test/test_redos.py
new file mode 100644
index 00000000..180ce931
--- /dev/null
+++ b/test/test_redos.py
@@ -0,0 +1,90 @@
+import logging
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+log = logging.getLogger("test")
+LIB_DIR = Path(__file__).parent.parent / "lib"
+
+
+def pull_387_example_1():
+ # https://github.com/trentm/python-markdown2/pull/387
+ return "[#a" + " " * 3456
+
+
+def pull_387_example_2():
+ # https://github.com/trentm/python-markdown2/pull/387
+ return "```" + "\n" * 3456
+
+
+def pull_387_example_3():
+ # https://github.com/trentm/python-markdown2/pull/387
+ return "-*-" + " " * 3456
+
+
+def pull_402():
+ # https://github.com/trentm/python-markdown2/pull/402
+ return " " * 100_000 + "$"
+
+
+def issue493():
+ # https://github.com/trentm/python-markdown2/issues/493
+ return "**_" + "*_" * 38730 * 10 + "\x00"
+
+
+def issue_633():
+ # https://github.com/trentm/python-markdown2/issues/633
+ return '
max_time + tolerance:
- raise DurationError(('Test was too long (%.2f s)'
- % total_time))
+ raise DurationError('Test was too long (%.2f s)'
+ % total_time)
return wrapper
return _timedtest
@@ -144,7 +140,7 @@ def wrapper(*args, **kw):
#---- module api
-class Test(object):
+class Test:
def __init__(self, ns, testmod, testcase, testfn_name,
testsuite_class=None):
self.ns = ns
@@ -238,13 +234,12 @@ def testmods_from_testdir(testdir):
testmod_name = splitext(basename(testmod_path))[0]
log.debug("import test module '%s'", testmod_path)
try:
- iinfo = imp.find_module(testmod_name, [dirname(testmod_path)])
testabsdir = abspath(testdir)
sys.path.insert(0, testabsdir)
old_dir = os.getcwd()
os.chdir(testdir)
try:
- testmod = imp.load_module(testmod_name, *iinfo)
+ testmod = importlib.import_module(testmod_name)
finally:
os.chdir(old_dir)
sys.path.remove(testabsdir)
@@ -444,7 +439,7 @@ def list_tests(testdir_from_ns, tags):
if testfile.endswith(".pyc"):
testfile = testfile[:-1]
print("%s:" % t.shortname())
- print(" from: %s#%s.%s" % (testfile,
+ print(" from: {}#{}.{}".format(testfile,
t.testcase.__class__.__name__, t.testfn_name))
wrapped = textwrap.fill(' '.join(t.tags()), WIDTH-10)
print(" tags: %s" % _indent(wrapped, 8, True))
@@ -475,7 +470,7 @@ def __init__(self, stream):
def getDescription(self, test):
if test._testlib_explicit_tags_:
- return "%s [%s]" % (test._testlib_shortname_,
+ return "{} [{}]".format(test._testlib_shortname_,
', '.join(test._testlib_explicit_tags_))
else:
return test._testlib_shortname_
@@ -519,7 +514,7 @@ def printErrorList(self, flavour, errors):
self.stream.write("%s\n" % err)
-class ConsoleTestRunner(object):
+class ConsoleTestRunner:
"""A test runner class that displays results on the console.
It prints out the names of tests as they are run, errors as they
diff --git a/test/tm-cases/admonitions.html b/test/tm-cases/admonitions.html
new file mode 100644
index 00000000..e8a7d05e
--- /dev/null
+++ b/test/tm-cases/admonitions.html
@@ -0,0 +1,53 @@
+
+
+
+
+
Otherwise the text is no longer part of the admonition.
+
+
+
+
print('In case you wanted something like')
+print('an indented code block right after')
+
+
+
diff --git a/test/tm-cases/admonitions.opts b/test/tm-cases/admonitions.opts
new file mode 100644
index 00000000..44e237fc
--- /dev/null
+++ b/test/tm-cases/admonitions.opts
@@ -0,0 +1 @@
+{"extras": ["admonitions"]}
\ No newline at end of file
diff --git a/test/tm-cases/admonitions.text b/test/tm-cases/admonitions.text
new file mode 100644
index 00000000..b79ff8a1
--- /dev/null
+++ b/test/tm-cases/admonitions.text
@@ -0,0 +1,38 @@
+.. NOTE:: Admonitions
+ They contain 3 main parts, the admonition type, title and body.
+
+ The admonition type is case insensitive, title is optional and the body
+ should be able to contain pretty much anything. For example:
+
+ - Lists
+ - With multiple levels
+ - Of indentation
+
+ And code blocks:
+
+ print('indented code blocks')
+
+.. warning::
+ The admonition's body must be indented by a tab or 3 or more spaces
+ from where the admonition was declared
+ Otherwise the text is no longer part of the admonition.
+
+.. IMPORTANT::
+ You can also use 3 or more empty lines after an admonition
+ to end it
+
+
+
+ print('In case you wanted something like')
+ print('an indented code block right after')
+
+.. admonition:: Generic admonitions
+
+ These should be given a title but this is not enforced
+
+ .. note:: Nested admonitions
+ Nested admonitions should also work
+
+ - Even inside
+ .. tip::
+ of a list
\ No newline at end of file
diff --git a/test/tm-cases/admonitions_with_fenced_code_blocks.html b/test/tm-cases/admonitions_with_fenced_code_blocks.html
new file mode 100644
index 00000000..7428c571
--- /dev/null
+++ b/test/tm-cases/admonitions_with_fenced_code_blocks.html
@@ -0,0 +1,35 @@
+
+
+
+
+
+
# admonitions WITHIN fenced code blocks should NOT be rendered
+..attention::title
+ body
+
+
diff --git a/test/tm-cases/admonitions_with_fenced_code_blocks.opts b/test/tm-cases/admonitions_with_fenced_code_blocks.opts
new file mode 100644
index 00000000..145a5305
--- /dev/null
+++ b/test/tm-cases/admonitions_with_fenced_code_blocks.opts
@@ -0,0 +1 @@
+{"extras": ["admonitions", "fenced-code-blocks", "pygments"]}
\ No newline at end of file
diff --git a/test/tm-cases/admonitions_with_fenced_code_blocks.tags b/test/tm-cases/admonitions_with_fenced_code_blocks.tags
new file mode 100644
index 00000000..f0d7f9c4
--- /dev/null
+++ b/test/tm-cases/admonitions_with_fenced_code_blocks.tags
@@ -0,0 +1 @@
+extra admonitions fenced-code-blocks pygments
diff --git a/test/tm-cases/admonitions_with_fenced_code_blocks.text b/test/tm-cases/admonitions_with_fenced_code_blocks.text
new file mode 100644
index 00000000..9a04bee2
--- /dev/null
+++ b/test/tm-cases/admonitions_with_fenced_code_blocks.text
@@ -0,0 +1,23 @@
+.. note::
+ Admonitions are able to contain fenced code blocks
+ ```python
+ print('like so')
+ ```
+
+.. warning::
+ ```python
+ print('Consecutive blocks should also be fine')
+ ```
+ ```python
+ print('Even though fenced code blocks wrap themselves in newlines')
+ ```
+ .. hint:: It should also work nested
+ ```python
+ print('ok')
+ ```
+
+```python
+# admonitions WITHIN fenced code blocks should NOT be rendered
+.. attention:: title
+ body
+```
\ No newline at end of file
diff --git a/test/tm-cases/alerts.html b/test/tm-cases/alerts.html
new file mode 100644
index 00000000..c5d957b3
--- /dev/null
+++ b/test/tm-cases/alerts.html
@@ -0,0 +1,24 @@
+
+Note
+
Useful information that users should know, even when skimming content.
+
+
+
+Tip
+
Helpful advice for doing things better or more easily.
+
+
+
+Important
+
Key information users need to know to achieve their goal.
+
+
+
+Warning
+
Urgent info that needs immediate user attention to avoid problems.
+
+
+
+Caution
+
Advises about risks or negative outcomes of certain actions.
+
diff --git a/test/tm-cases/alerts.opts b/test/tm-cases/alerts.opts
new file mode 100644
index 00000000..2913a414
--- /dev/null
+++ b/test/tm-cases/alerts.opts
@@ -0,0 +1 @@
+{"extras": ["alerts"]}
diff --git a/test/tm-cases/alerts.text b/test/tm-cases/alerts.text
new file mode 100644
index 00000000..b00ffbdd
--- /dev/null
+++ b/test/tm-cases/alerts.text
@@ -0,0 +1,15 @@
+> [!NOTE]
+> Useful information that users should know, even when skimming content.
+
+> [!TIP]
+> Helpful advice for doing things better or more easily.
+
+> [!IMPORTANT]
+> Key information users need to know to achieve their goal.
+
+> [!WARNING]
+> Urgent info that needs immediate user attention to avoid problems.
+
+> [!CAUTION]
+>
+> Advises about risks or negative outcomes of certain actions.
diff --git a/test/tm-cases/backslash_escape_empty_links.html b/test/tm-cases/backslash_escape_empty_links.html
new file mode 100644
index 00000000..bffeaa51
--- /dev/null
+++ b/test/tm-cases/backslash_escape_empty_links.html
@@ -0,0 +1,4 @@
+
how about text \<strong>with triple\</strong> escapes
+
+
escaped auto-link <https://www.example.com>
+not quite escaped auto link \https://www.example.com
+escaped auto-link \<https://www.example.com>
+
+
<!-- and escaped HTML comment --> \ \<!--and another that is-->
diff --git a/test/tm-cases/backslash_escape_html_tags.text b/test/tm-cases/backslash_escape_html_tags.text
new file mode 100644
index 00000000..e26bf2b5
--- /dev/null
+++ b/test/tm-cases/backslash_escape_html_tags.text
@@ -0,0 +1,13 @@
+this is \some strong\ text
+
+[\](http://localhost/)text\
+
+text \\with double\\ escapes
+
+how about text \\\with triple\\\ escapes
+
+escaped auto-link \
+not quite escaped auto link \\
+escaped auto-link \\\
+
+\ \\ \\\
\ No newline at end of file
diff --git a/test/tm-cases/backslash_removed_by_adjacent_backtick.html b/test/tm-cases/backslash_removed_by_adjacent_backtick.html
new file mode 100644
index 00000000..1091b1a7
--- /dev/null
+++ b/test/tm-cases/backslash_removed_by_adjacent_backtick.html
@@ -0,0 +1,7 @@
+
hello \world
+
+
hello \world my favourite letter is w
+
+
hello \world my favourite code is import pickle
+
+
hello \world my favourite letter is x
diff --git a/test/tm-cases/backslash_removed_by_adjacent_backtick.text b/test/tm-cases/backslash_removed_by_adjacent_backtick.text
new file mode 100644
index 00000000..4136c904
--- /dev/null
+++ b/test/tm-cases/backslash_removed_by_adjacent_backtick.text
@@ -0,0 +1,7 @@
+hello \world
+
+hello \world my favourite letter is `w`
+
+hello \world my favourite code is `import pickle`
+
+hello \world my favourite letter is `x`
\ No newline at end of file
diff --git a/test/tm-cases/basic_safe_mode.html b/test/tm-cases/basic_safe_mode.html
index 435a3329..60051078 100644
--- a/test/tm-cases/basic_safe_mode.html
+++ b/test/tm-cases/basic_safe_mode.html
@@ -6,13 +6,13 @@