diff --git a/markdown_it/_doc_renderer.py b/markdown_it/_doc_renderer.py deleted file mode 100644 index 1957b31c..00000000 --- a/markdown_it/_doc_renderer.py +++ /dev/null @@ -1,328 +0,0 @@ -"""NOTE: this will eventually be moved out of core""" -from contextlib import contextmanager -import json -from typing import List - -import yaml - -from docutils import nodes -from docutils.frontend import OptionParser - -# from docutils.languages import get_language -# from docutils.parsers.rst import directives, Directive, DirectiveError, roles -from docutils.parsers.rst import Parser as RSTParser - -# from docutils.parsers.rst.directives.misc import Include -# from docutils.parsers.rst.states import RSTStateMachine, Body, Inliner -# from docutils.statemachine import StringList -from docutils.utils import new_document, Reporter # noqa - -from markdown_it.token import Token, nest_tokens - - -def make_document(source_path="notset") -> nodes.document: - """Create a new docutils document.""" - settings = OptionParser(components=(RSTParser,)).get_default_values() - return new_document(source_path, settings=settings) - - -class DocRenderer: - __output__ = "docutils" - - def __init__(self, options=None, env=None): - self.options = options or {} - self.env = env or {} - self.rules = { - k: v - for k, v in self.__class__.__dict__.items() - if k.startswith("render_") and k != "render_children" - } - self.document = make_document() - self.current_node = self.document - self.config = {} - self._level_to_elem = {0: self.document} - - def run_render(self, tokens: List[Token]): - tokens = nest_tokens(tokens) - for i, token in enumerate(tokens): - if f"render_{token.type}" in self.rules: - self.rules[f"render_{token.type}"](self, token) - else: - print(f"no render method for: {token.type}") - - @contextmanager - def current_node_context(self, node, append: bool = False): - """Context manager for temporarily setting the current node.""" - if append: - self.current_node.append(node) - current_node = self.current_node - self.current_node = node - yield - self.current_node = current_node - - def render_children(self, token): - for i, child in enumerate(token.children or []): - if f"render_{child.type}" in self.rules: - self.rules[f"render_{child.type}"](self, child) - else: - print(f"no render method for: {child.type}") - - def add_line_and_source_path(self, node, token): - """Copy the line number and document source path to the docutils node.""" - try: - node.line = token.map[0] + 1 - except (AttributeError, TypeError): - pass - node.source = self.document["source"] - - def _is_section_level(self, level, section): - return self._level_to_elem.get(level, None) == section - - def _add_section(self, section, level): - parent_level = max( - section_level - for section_level in self._level_to_elem - if level > section_level - ) - parent = self._level_to_elem[parent_level] - parent.append(section) - self._level_to_elem[level] = section - - # Prune level to limit - self._level_to_elem = dict( - (section_level, section) - for section_level, section in self._level_to_elem.items() - if section_level <= level - ) - - def renderInlineAsText(self, tokens: List[Token]) -> str: - """Special kludge for image `alt` attributes to conform CommonMark spec. - - Don't try to use it! Spec requires to show `alt` content with stripped markup, - instead of simple escaping. - """ - result = "" - - for token in tokens or []: - if token.type == "text": - result += token.content - # elif token.type == "image": - # result += self.renderInlineAsText(token.children) - else: - result += self.renderInlineAsText(token.children) - - return result - - # ### render methods for tokens - - def render_paragraph_open(self, token): - para = nodes.paragraph("") - self.add_line_and_source_path(para, token) - with self.current_node_context(para, append=True): - self.render_children(token) - - def render_inline(self, token): - self.render_children(token) - - def render_text(self, token): - self.current_node.append(nodes.Text(token.content, token.content)) - - def render_bullet_list_open(self, token): - list_node = nodes.bullet_list() - self.add_line_and_source_path(list_node, token) - with self.current_node_context(list_node, append=True): - self.render_children(token) - - def render_list_item_open(self, token): - item_node = nodes.list_item() - self.add_line_and_source_path(item_node, token) - with self.current_node_context(item_node, append=True): - self.render_children(token) - - def render_em_open(self, token): - node = nodes.emphasis() - self.add_line_and_source_path(node, token) - with self.current_node_context(node, append=True): - self.render_children(token) - - def render_softbreak(self, token): - self.current_node.append(nodes.Text("\n")) - - def render_strong_open(self, token): - node = nodes.strong() - self.add_line_and_source_path(node, token) - with self.current_node_context(node, append=True): - self.render_children(token) - - def render_blockquote_open(self, token): - quote = nodes.block_quote() - self.add_line_and_source_path(quote, token) - with self.current_node_context(quote, append=True): - self.render_children(token) - - def render_hr(self, token): - node = nodes.transition() - self.add_line_and_source_path(node, token) - self.current_node.append(node) - - def render_code_inline(self, token): - node = nodes.literal(token.content, token.content) - self.add_line_and_source_path(node, token) - self.current_node.append(node) - - def render_fence(self, token): - text = token.content - language = token.info.split()[0] - if not language: - try: - sphinx_env = self.document.settings.env - language = sphinx_env.temp_data.get( - "highlight_language", sphinx_env.config.highlight_language - ) - except AttributeError: - pass - if not language: - language = self.config.get("highlight_language", "") - node = nodes.literal_block(text, text, language=language) - self.add_line_and_source_path(node, token) - self.current_node.append(node) - - def render_heading_open(self, token): - # Test if we're replacing a section level first - - level = int(token.tag[1]) - if isinstance(self.current_node, nodes.section): - if self._is_section_level(level, self.current_node): - self.current_node = self.current_node.parent - - title_node = nodes.title() - self.add_line_and_source_path(title_node, token) - - new_section = nodes.section() - self.add_line_and_source_path(new_section, token) - new_section.append(title_node) - - self._add_section(new_section, level) - - self.current_node = title_node - self.render_children(token) - - assert isinstance(self.current_node, nodes.title) - text = self.current_node.astext() - # if self.translate_section_name: - # text = self.translate_section_name(text) - name = nodes.fully_normalize_name(text) - section = self.current_node.parent - section["names"].append(name) - self.document.note_implicit_target(section, section) - self.current_node = section - - def render_link_open(self, token): - # TODO I think this is maybe already handled at this point? - # refuri = escape_url(http://github.com/token.target) - refuri = target = token.attrGet("href") - ref_node = nodes.reference(target, target, refuri=refuri) - self.add_line_and_source_path(ref_node, token) - self.current_node.append(ref_node) - - def render_html_inline(self, token): - self.current_node.append(nodes.raw("", token.content, format="html")) - - def render_html_block(self, token): - self.current_node.append(nodes.raw("", token.content, format="html")) - - def render_image(self, token): - img_node = nodes.image() - self.add_line_and_source_path(img_node, token) - img_node["uri"] = token.attrGet("src") - # TODO ideally we would render proper markup here - img_node["alt"] = self.renderInlineAsText(token.children) - - self.current_node.append(img_node) - - def render_front_matter(self, token): - """Pass document front matter data - - For RST, all field lists are captured by - ``docutils.docutils.parsers.rst.states.Body.field_marker``, - then, if one occurs at the document, it is transformed by - `docutils.docutils.transforms.frontmatter.DocInfo`, and finally - this is intercepted by sphinx and added to the env in - `sphinx.environment.collectors.metadata.MetadataCollector.process_doc` - - So technically the values should be parsed to AST, but this is redundant, - since `process_doc` just converts them back to text. - - """ - try: - data = yaml.safe_load(token.content) - except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error: - msg_node = self.reporter.error( - "Front matter block:\n" + str(error), line=token.map[0] - ) - msg_node += nodes.literal_block(token.content, token.content) - self.current_node += [msg_node] - return - - docinfo = dict_to_docinfo(data) - self.current_node.append(docinfo) - - def render_myst_block_break(self, token): - block_break = nodes.comment(token.content, token.content) - block_break["classes"] += ["block_break"] - self.add_line_and_source_path(block_break, token) - self.current_node.append(block_break) - - def render_myst_target(self, token): - text = token.content - name = nodes.fully_normalize_name(text) - target = nodes.target(text) - target["names"].append(name) - self.add_line_and_source_path(target, token) - self.document.note_explicit_target(target, self.current_node) - self.current_node.append(target) - - def render_myst_role(self, token): - - name = token.meta["name"] - # TODO representing as literal for place-holder - content = f":{name}:`{token.content}`" - node = nodes.literal(content, content) - self.add_line_and_source_path(node, token) - self.current_node.append(node) - - # def render_table_open(self, token): - # # print(token) - # # raise - - # table = nodes.table() - # table["classes"] += ["colwidths-auto"] - # self.add_line_and_source_path(table, token) - - # thead = nodes.thead() - # # TODO there can never be more than one header row (at least in mardown-it) - # header = token.children[0].children[0] - # for hrow in header.children: - # nodes.t - # style = hrow.attrGet("style") - - # tgroup = nodes.tgroup(cols) - # table += tgroup - # tgroup += thead - - -def dict_to_docinfo(data): - """Render a key/val pair as a docutils field node.""" - # TODO this data could be used to support default option values for directives - docinfo = nodes.docinfo() - - for key, value in data.items(): - if not isinstance(value, (str, int, float)): - value = json.dumps(value) - value = str(value) - field_node = nodes.field() - field_node.source = value - field_node += nodes.field_name(key, "", nodes.Text(key, key)) - field_node += nodes.field_body(value, nodes.Text(value, value)) - docinfo += field_node - return docinfo diff --git a/markdown_it/main.py b/markdown_it/main.py index a747f863..b40a166b 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager from typing import Any, Callable, Dict, List, Optional, Union from . import helpers, presets # noqa F401 @@ -107,10 +108,12 @@ def configure(self, presets: Union[str, AttrDict]): def get_active_rules(self) -> Dict[str, List[str]]: """Return the names of all active rules.""" - return { + rules = { chain: self[chain].ruler.get_active_rules() for chain in ["core", "block", "inline"] } + rules["inline2"] = self.inline.ruler2.get_active_rules() + return rules def enable( self, names: Union[str, List[str]], ignoreInvalid: bool = False @@ -168,6 +171,15 @@ def disable( raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}") return self + @contextmanager + def reset_rules(self): + chain_rules = self.get_active_rules() + yield + for chain, rules in chain_rules.items(): + if chain != "inline2": + self[chain].ruler.enableOnly(rules) + self.inline.ruler2.enableOnly(chain_rules["inline2"]) + def add_render_rule(self, name: str, function: Callable, fmt="html"): """Add a rule for rendering a particular Token type. @@ -243,7 +255,7 @@ def parseInline(self, src: str, env: Optional[AttrDict] = None) -> List[Token]: raise TypeError(f"Input data should be an AttrDict, not {type(env)}") if not isinstance(src, str): raise TypeError(f"Input data should be a string, not {type(src)}") - state = self.core.State(src, self, env) + state = StateCore(src, self, env) state.inlineMode = True self.core.process(state) return state.tokens diff --git a/markdown_it/myst/doc_renderer.py b/markdown_it/myst/doc_renderer.py new file mode 100644 index 00000000..0201782d --- /dev/null +++ b/markdown_it/myst/doc_renderer.py @@ -0,0 +1,644 @@ +"""NOTE: this will eventually be moved out of core""" +from contextlib import contextmanager +import inspect +import json +from os.path import splitext +from typing import List, Optional +from urllib.parse import urlparse + +import yaml + +from docutils import nodes +from docutils.frontend import OptionParser + +from docutils.languages import get_language +from docutils.parsers.rst import directives, Directive, DirectiveError, roles +from docutils.parsers.rst import Parser as RSTParser +from docutils.parsers.rst.directives.misc import Include +from docutils.statemachine import StringList +from docutils.utils import new_document, Reporter + +from markdown_it import MarkdownIt +from markdown_it.token import Token, nest_tokens +from markdown_it.utils import AttrDict +from markdown_it.common.utils import escapeHtml + +from .mocking import ( + MockInliner, + MockState, + MockStateMachine, + MockingError, + MockIncludeDirective, +) +from .parse_directives import parse_directive_text, DirectiveParsingError + + +def make_document(source_path="notset") -> nodes.document: + """Create a new docutils document.""" + settings = OptionParser(components=(RSTParser,)).get_default_values() + return new_document(source_path, settings=settings) + + +class DocutilsRenderer: + __output__ = "docutils" + + def __init__( + self, + md: MarkdownIt, + options=None, + document: Optional[nodes.document] = None, + current_node: Optional[nodes.Element] = None, + ): + self.md = md + self.options = options or {} + self.rules = { + k: v + for k, v in inspect.getmembers(self, predicate=inspect.ismethod) + if k.startswith("render_") and k != "render_children" + } + self.document = document or make_document() + self.reporter = self.document.reporter # type: Reporter + self.current_node = current_node or self.document + self.language_module = self.document.settings.language_code # type: str + get_language(self.language_module) + # TODO merge these with self.env? + self.config = {} + self._level_to_elem = {0: self.document} + + def run_render(self, tokens: List[Token], env: AttrDict, ouput_footnotes=True): + """Run the render on a token stream. + + :param tokens: the token stream + :param env: the environment sandbox associated with the tokens, + containing additional metadata like reference info + """ + self.env = env + + # propagate line number down to inline elements + for token in tokens: + for child in token.children or []: + child.map = token.map + + # nest tokens + tokens = nest_tokens(tokens) + + # move footnote definitions to env + self.env.setdefault("foot_refs", []) + new_tokens = [] + for token in tokens: + if token.type == "footnote_reference_open": + self.env["foot_refs"].append(token) + else: + new_tokens.append(token) + tokens = new_tokens + + # render + for i, token in enumerate(tokens): + # skip hidden? + if f"render_{token.type}" in self.rules: + self.rules[f"render_{token.type}"](token) + else: + print(f"no render method for: {token.type}") + + # TODO log warning for duplicate references + + if not ouput_footnotes: + return self.document + + # add footnotes + referenced = { + v["label"] for v in self.env.get("footnotes", {}).get("list", {}).values() + } + # only output referenced + foot_refs = [f for f in self.env["foot_refs"] if f.meta["label"] in referenced] + + if foot_refs: + self.current_node.append(nodes.transition()) + for footref in foot_refs: # TODO sort by referenced + self.render_footnote_reference_open(footref) + + return self.document + + def nested_render_text(self, text: str, lineno: int): + """Render unparsed text.""" + + # parse without front matter + with self.md.reset_rules(): + self.md.disable("front_matter", True) + tokens = self.md.parse(text, self.env) + + # set correct line numbers + for token in tokens: + if token.map: + token.map = [token.map[0] + lineno, token.map[1] + lineno] + for child in token.children or []: + child.map = token.map + + # nest tokens + tokens = nest_tokens(tokens) + + # move footnote definitions to env + self.env.setdefault("foot_refs", []) + new_tokens = [] + for token in tokens: + if token.type == "footnote_reference_open": + self.env["foot_refs"].append(token) + else: + new_tokens.append(token) + tokens = new_tokens + + # render + for i, token in enumerate(tokens): + # skip hidden? + if f"render_{token.type}" in self.rules: + self.rules[f"render_{token.type}"](token) + else: + print(f"no render method for: {token.type}") + + @contextmanager + def current_node_context(self, node, append: bool = False): + """Context manager for temporarily setting the current node.""" + if append: + self.current_node.append(node) + current_node = self.current_node + self.current_node = node + yield + self.current_node = current_node + + def render_children(self, token): + for i, child in enumerate(token.children or []): + if f"render_{child.type}" in self.rules: + self.rules[f"render_{child.type}"](child) + else: + print(f"no render method for: {child.type}") + + def add_line_and_source_path(self, node, token): + """Copy the line number and document source path to the docutils node.""" + try: + node.line = token.map[0] + 1 + except (AttributeError, TypeError): + pass + node.source = self.document["source"] + + def _is_section_level(self, level, section): + return self._level_to_elem.get(level, None) == section + + def _add_section(self, section, level): + parent_level = max( + section_level + for section_level in self._level_to_elem + if level > section_level + ) + parent = self._level_to_elem[parent_level] + parent.append(section) + self._level_to_elem[level] = section + + # Prune level to limit + self._level_to_elem = dict( + (section_level, section) + for section_level, section in self._level_to_elem.items() + if section_level <= level + ) + + def renderInlineAsText(self, tokens: List[Token]) -> str: + """Special kludge for image `alt` attributes to conform CommonMark spec. + + Don't try to use it! Spec requires to show `alt` content with stripped markup, + instead of simple escaping. + """ + result = "" + + for token in tokens or []: + if token.type == "text": + result += token.content + # elif token.type == "image": + # result += self.renderInlineAsText(token.children) + else: + result += self.renderInlineAsText(token.children) + + return result + + # ### render methods for commonmark tokens + + def render_paragraph_open(self, token): + para = nodes.paragraph("") + self.add_line_and_source_path(para, token) + with self.current_node_context(para, append=True): + self.render_children(token) + + def render_inline(self, token): + self.render_children(token) + + def render_text(self, token): + self.current_node.append(nodes.Text(token.content, token.content)) + + def render_bullet_list_open(self, token): + list_node = nodes.bullet_list() + self.add_line_and_source_path(list_node, token) + with self.current_node_context(list_node, append=True): + self.render_children(token) + + def render_ordered_list_open(self, token): + list_node = nodes.enumerated_list() + self.add_line_and_source_path(list_node, token) + with self.current_node_context(list_node, append=True): + self.render_children(token) + + def render_list_item_open(self, token): + item_node = nodes.list_item() + self.add_line_and_source_path(item_node, token) + with self.current_node_context(item_node, append=True): + self.render_children(token) + + def render_em_open(self, token): + node = nodes.emphasis() + self.add_line_and_source_path(node, token) + with self.current_node_context(node, append=True): + self.render_children(token) + + def render_softbreak(self, token): + self.current_node.append(nodes.Text("\n")) + + def render_strong_open(self, token): + node = nodes.strong() + self.add_line_and_source_path(node, token) + with self.current_node_context(node, append=True): + self.render_children(token) + + def render_blockquote_open(self, token): + quote = nodes.block_quote() + self.add_line_and_source_path(quote, token) + with self.current_node_context(quote, append=True): + self.render_children(token) + + def render_hr(self, token): + node = nodes.transition() + self.add_line_and_source_path(node, token) + self.current_node.append(node) + + def render_code_inline(self, token): + node = nodes.literal(token.content, token.content) + self.add_line_and_source_path(node, token) + self.current_node.append(node) + + def render_fence(self, token): + text = token.content + language = token.info.split()[0] if token.info else "" + + if language.startswith("{") and language.endswith("}"): + return self.render_directive(token) + + if not language: + try: + sphinx_env = self.document.settings.env + language = sphinx_env.temp_data.get( + "highlight_language", sphinx_env.config.highlight_language + ) + except AttributeError: + pass + if not language: + language = self.config.get("highlight_language", "") + node = nodes.literal_block(text, text, language=language) + self.add_line_and_source_path(node, token) + self.current_node.append(node) + + def render_heading_open(self, token): + # Test if we're replacing a section level first + + level = int(token.tag[1]) + if isinstance(self.current_node, nodes.section): + if self._is_section_level(level, self.current_node): + self.current_node = self.current_node.parent + + title_node = nodes.title() + self.add_line_and_source_path(title_node, token) + + new_section = nodes.section() + self.add_line_and_source_path(new_section, token) + new_section.append(title_node) + + self._add_section(new_section, level) + + self.current_node = title_node + self.render_children(token) + + assert isinstance(self.current_node, nodes.title) + text = self.current_node.astext() + # if self.translate_section_name: + # text = self.translate_section_name(text) + name = nodes.fully_normalize_name(text) + section = self.current_node.parent + section["names"].append(name) + self.document.note_implicit_target(section, section) + self.current_node = section + + def render_link_open(self, token): + if token.markup == "autolink": + return self.render_autolink(token) + + ref_node = nodes.reference() + self.add_line_and_source_path(ref_node, token) + # Check destination is supported for cross-linking and remove extension + # TODO escape urls? + destination = token.attrGet("href") + title = token.attrGet("title") + _, ext = splitext(destination) + # TODO check for other supported extensions, such as those specified in + # the Sphinx conf.py file but how to access this information? + # TODO this should probably only remove the extension for local paths, + # i.e. not uri's starting with http or other external prefix. + + # if ext.replace('.', '') in self.supported: + # destination = destination.replace(ext, '') + ref_node["refuri"] = destination + print(token) + if title: + ref_node["title"] = title + next_node = ref_node + + url_check = urlparse(destination) + # If there's not a url scheme (e.g. 'https' for 'https:...' links), + # or there is a scheme but it's not in the list of known_url_schemes, + # then assume it's a cross-reference + known_url_schemes = self.config.get("known_url_schemes", None) + if known_url_schemes: + scheme_known = url_check.scheme in known_url_schemes + else: + scheme_known = bool(url_check.scheme) + + if not url_check.fragment and not scheme_known: + self.handle_cross_reference(token, destination) + else: + self.current_node.append(next_node) + with self.current_node_context(ref_node): + self.render_children(token) + + def handle_cross_reference(self, token, destination): + # TODO use the docutils error reporting mechanisms, rather than raising + if not self.config.get("ignore_missing_refs", False): + raise NotImplementedError( + "reference not found in current document: {} (lines: {})".format( + destination, token.map + ) + ) + + def render_autolink(self, token): + refuri = target = escapeHtml(token.attrGet("href")) + ref_node = nodes.reference(target, target, refuri=refuri) + self.add_line_and_source_path(ref_node, token) + self.current_node.append(ref_node) + + def render_html_inline(self, token): + self.current_node.append(nodes.raw("", token.content, format="html")) + + def render_html_block(self, token): + self.current_node.append(nodes.raw("", token.content, format="html")) + + def render_image(self, token): + img_node = nodes.image() + self.add_line_and_source_path(img_node, token) + img_node["uri"] = token.attrGet("src") + # TODO ideally we would render proper markup here + img_node["alt"] = self.renderInlineAsText(token.children) + + self.current_node.append(img_node) + + # ### render methods for plugin tokens + + def render_front_matter(self, token): + """Pass document front matter data + + For RST, all field lists are captured by + ``docutils.docutils.parsers.rst.states.Body.field_marker``, + then, if one occurs at the document, it is transformed by + `docutils.docutils.transforms.frontmatter.DocInfo`, and finally + this is intercepted by sphinx and added to the env in + `sphinx.environment.collectors.metadata.MetadataCollector.process_doc` + + So technically the values should be parsed to AST, but this is redundant, + since `process_doc` just converts them back to text. + + """ + try: + data = yaml.safe_load(token.content) + except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error: + msg_node = self.reporter.error( + "Front matter block:\n" + str(error), line=token.map[0] + ) + msg_node += nodes.literal_block(token.content, token.content) + self.current_node += [msg_node] + return + + docinfo = dict_to_docinfo(data) + self.current_node.append(docinfo) + + # def render_table_open(self, token): + # # print(token) + # # raise + + # table = nodes.table() + # table["classes"] += ["colwidths-auto"] + # self.add_line_and_source_path(table, token) + + # thead = nodes.thead() + # # TODO there can never be more than one header row (at least in mardown-it) + # header = token.children[0].children[0] + # for hrow in header.children: + # nodes.t + # style = hrow.attrGet("style") + + # tgroup = nodes.tgroup(cols) + # table += tgroup + # tgroup += thead + + def render_math_inline(self, token): + content = token.content + node = nodes.math(content, content) + self.add_line_and_source_path(node, token) + self.current_node.append(node) + + def render_math_block(self, token): + content = token.content + node = nodes.math_block(content, content, nowrap=False, number=None) + self.add_line_and_source_path(node, token) + self.current_node.append(node) + + def render_footnote_ref(self, token): + """Footnote references are added as auto-numbered, + .i.e. `[^a]` is read as rST `[#a]_` + """ + # TODO we now also have ^[a] the inline version (currently disabled) + # that would be rendered here + target = token.meta["label"] + refnode = nodes.footnote_reference("[^{}]".format(target)) + self.add_line_and_source_path(refnode, token) + refnode["auto"] = 1 + refnode["refname"] = target + # refnode += nodes.Text(token.target) + self.document.note_autofootnote_ref(refnode) + self.document.note_footnote_ref(refnode) + self.current_node.append(refnode) + + def render_footnote_reference_open(self, token): + target = token.meta["label"] + footnote = nodes.footnote() + self.add_line_and_source_path(footnote, token) + footnote["names"].append(target) + footnote["auto"] = 1 + self.document.note_autofootnote(footnote) + self.document.note_explicit_target(footnote, footnote) + with self.current_node_context(footnote, append=True): + self.render_children(token) + + def render_myst_block_break(self, token): + block_break = nodes.comment(token.content, token.content) + block_break["classes"] += ["block_break"] + self.add_line_and_source_path(block_break, token) + self.current_node.append(block_break) + + def render_myst_target(self, token): + text = token.content + name = nodes.fully_normalize_name(text) + target = nodes.target(text) + target["names"].append(name) + self.add_line_and_source_path(target, token) + self.document.note_explicit_target(target, self.current_node) + self.current_node.append(target) + + def render_myst_line_comment(self, token): + self.current_node.append(nodes.comment(token.content, token.content)) + + def render_myst_role(self, token): + name = token.meta["name"] + text = escapeHtml(token.content) # TODO check this + rawsource = f":{name}:`{token.content}`" + lineno = token.map[0] if token.map else 0 + role_func, messages = roles.role( + name, self.language_module, lineno, self.reporter + ) + inliner = MockInliner(self, lineno) + if role_func: + nodes, messages2 = role_func(name, rawsource, text, lineno, inliner) + # return nodes, messages + messages2 + self.current_node += nodes + else: + message = self.reporter.error( + 'Unknown interpreted text role "{}".'.format(name), line=lineno + ) + problematic = inliner.problematic(text, rawsource, message) + self.current_node += problematic + + def render_directive(self, token: Token): + """Render special fenced code blocks as directives.""" + first_line = token.info.split(maxsplit=1) + name = first_line[0][1:-1] + arguments = "" if len(first_line) == 1 else first_line[1] + # TODO directive name white/black lists + content = token.content + position = token.map[0] + self.document.current_line = position + + # get directive class + directive_class, messages = directives.directive( + name, self.language_module, self.document + ) # type: (Directive, list) + if not directive_class: + error = self.reporter.error( + "Unknown directive type '{}'\n".format(name), + # nodes.literal_block(content, content), + line=position, + ) + self.current_node += [error] + messages + return + + try: + arguments, options, body_lines = parse_directive_text( + directive_class, arguments, content + ) + except DirectiveParsingError as error: + error = self.reporter.error( + "Directive '{}':\n{}".format(name, error), + nodes.literal_block(content, content), + line=position, + ) + self.current_node += [error] + return + + # initialise directive + if issubclass(directive_class, Include): + directive_instance = MockIncludeDirective( + self, + name=name, + klass=directive_class, + arguments=arguments, + options=options, + body=body_lines, + token=token, + ) + else: + state_machine = MockStateMachine(self, position) + state = MockState(self, state_machine, position) + directive_instance = directive_class( + name=name, + # the list of positional arguments + arguments=arguments, + # a dictionary mapping option names to values + options=options, + # the directive content line by line + content=StringList(body_lines, self.document["source"]), + # the absolute line number of the first line of the directive + lineno=position, + # the line offset of the first line of the content + content_offset=0, # TODO get content offset from `parse_directive_text` + # a string containing the entire directive + block_text="\n".join(body_lines), + state=state, + state_machine=state_machine, + ) + + # run directive + try: + result = directive_instance.run() + except DirectiveError as error: + msg_node = self.reporter.system_message( + error.level, error.msg, line=position + ) + msg_node += nodes.literal_block(content, content) + result = [msg_node] + except MockingError as exc: + error = self.reporter.error( + "Directive '{}' cannot be mocked:\n{}: {}".format( + name, exc.__class__.__name__, exc + ), + nodes.literal_block(content, content), + line=position, + ) + self.current_node += [error] + return + assert isinstance( + result, list + ), 'Directive "{}" must return a list of nodes.'.format(name) + for i in range(len(result)): + assert isinstance( + result[i], nodes.Node + ), 'Directive "{}" returned non-Node object (index {}): {}'.format( + name, i, result[i] + ) + self.current_node += result + + +def dict_to_docinfo(data): + """Render a key/val pair as a docutils field node.""" + # TODO this data could be used to support default option values for directives + docinfo = nodes.docinfo() + + for key, value in data.items(): + if not isinstance(value, (str, int, float)): + value = json.dumps(value) + value = str(value) + field_node = nodes.field() + field_node.source = value + field_node += nodes.field_name(key, "", nodes.Text(key, key)) + field_node += nodes.field_body(value, nodes.Text(value, value)) + docinfo += field_node + return docinfo diff --git a/markdown_it/myst/mocking.py b/markdown_it/myst/mocking.py new file mode 100644 index 00000000..af045ed7 --- /dev/null +++ b/markdown_it/myst/mocking.py @@ -0,0 +1,395 @@ +from pathlib import Path +import re +import sys +from typing import List, Optional + +from docutils import nodes +from docutils.parsers.rst.states import Inliner, RSTStateMachine, Body +from docutils.parsers.rst import DirectiveError +from docutils.parsers.rst.directives.misc import Include +from docutils.statemachine import StringList + + +class MockingError(Exception): + """An exception to signal an error during mocking of docutils components.""" + + +class MockInliner: + """A mock version of `docutils.parsers.rst.states.Inliner`. + + This is parsed to role functions. + """ + + def __init__(self, renderer, lineno: int): + self._renderer = renderer + self.document = renderer.document + self.reporter = renderer.document.reporter + if not hasattr(self.reporter, "get_source_and_line"): + # TODO this is called by some roles, + # but I can't see how that would work in RST? + self.reporter.get_source_and_line = lambda l: (self.document["source"], l) + self.parent = renderer.current_node + self.language = renderer.language_module + self.rfc_url = "rfc%d.html" + + def problematic(self, text: str, rawsource: str, message: nodes.system_message): + msgid = self.document.set_id(message, self.parent) + problematic = nodes.problematic(rawsource, rawsource, refid=msgid) + prbid = self.document.set_id(problematic) + message.add_backref(prbid) + return problematic + + # TODO add parse method + + def __getattr__(self, name): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + # TODO use document.reporter mechanism? + if hasattr(Inliner, name): + msg = "{cls} has not yet implemented attribute '{name}'".format( + cls=type(self).__name__, name=name + ) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + + +class MockState: + """A mock version of `docutils.parsers.rst.states.RSTState`. + + This is parsed to the `Directives.run()` method, + so that they may run nested parses on their content that will be parsed as markdown, + rather than RST. + """ + + def __init__(self, renderer, state_machine: "MockStateMachine", lineno: int): + self._renderer = renderer + self._lineno = lineno + self.document = renderer.document + self.state_machine = state_machine + + class Struct: + document = self.document + reporter = self.document.reporter + language = self.document.settings.language_code + title_styles = [] + section_level = max(renderer._level_to_elem) + section_bubble_up_kludge = False + inliner = MockInliner(renderer, lineno) + + self.memo = Struct + + def nested_parse( + self, + block: StringList, + input_offset: int, + node: nodes.Element, + match_titles: bool = False, + state_machine_class=None, + state_machine_kwargs=None, + ): + current_match_titles = self.state_machine.match_titles + self.state_machine.match_titles = match_titles + with self._renderer.current_node_context(node): + self._renderer.nested_render_text( + "\n".join(block), self._lineno + input_offset + ) + self.state_machine.match_titles = current_match_titles + + def inline_text(self, text: str, lineno: int): + # TODO return messages? + messages = [] + paragraph = nodes.paragraph("") + + tokens = self._renderer.md.parseInline(text, self._renderer.env) + for token in tokens: + if token.map: + token.map = [token.map[0] + lineno, token.map[1] + lineno] + + # here we instantiate a new renderer, + # so that the nested parse does not effect the current renderer, + # but we use the same env, so that link references, etc + # are added to the global parse. + from .doc_renderer import DocutilsRenderer + + nested_renderer = DocutilsRenderer( + self._renderer.md, document=self.document, current_node=paragraph + ) + nested_renderer.run_render(tokens, self._renderer.env, ouput_footnotes=False) + return paragraph.children, messages + + # U+2014 is an em-dash: + attribution_pattern = re.compile("^((?:---?(?!-)|\u2014) *)(.+)") + + def block_quote(self, lines: List[str], line_offset: int): + """Parse a block quote, which is a block of text, + followed by an (optional) attribution. + + :: + + No matter where you go, there you are. + + -- Buckaroo Banzai + """ + elements = [] + # split attribution + last_line_blank = False + blockquote_lines = lines + attribution_lines = [] + attribution_line_offset = None + # First line after a blank line must begin with a dash + for i, line in enumerate(lines): + if not line.strip(): + last_line_blank = True + continue + if not last_line_blank: + last_line_blank = False + continue + last_line_blank = False + match = self.attribution_pattern.match(line) + if not match: + continue + attribution_line_offset = i + attribution_lines = [match.group(2)] + for at_line in lines[i + 1 :]: + indented_line = at_line[len(match.group(1)) :] + if len(indented_line) != len(at_line.lstrip()): + break + attribution_lines.append(indented_line) + blockquote_lines = lines[:i] + break + # parse block + blockquote = nodes.block_quote() + self.nested_parse(blockquote_lines, line_offset, blockquote) + elements.append(blockquote) + # parse attribution + if attribution_lines: + attribution_text = "\n".join(attribution_lines) + lineno = self._lineno + line_offset + attribution_line_offset + textnodes, messages = self.inline_text(attribution_text, lineno) + attribution = nodes.attribution(attribution_text, "", *textnodes) + ( + attribution.source, + attribution.line, + ) = self.state_machine.get_source_and_line(lineno) + blockquote += attribution + elements += messages + return elements + + def build_table(self, tabledata, tableline, stub_columns=0, widths=None): + return Body.build_table(self, tabledata, tableline, stub_columns, widths) + + def build_table_row(self, rowdata, tableline): + return Body.build_table_row(self, rowdata, tableline) + + def __getattr__(self, name): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + if hasattr(Body, name): + msg = "{cls} has not yet implemented attribute '{name}'".format( + cls=type(self).__name__, name=name + ) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + + +class MockStateMachine: + """A mock version of `docutils.parsers.rst.states.RSTStateMachine`. + + This is parsed to the `Directives.run()` method. + """ + + def __init__(self, renderer, lineno: int): + self._renderer = renderer + self._lineno = lineno + self.document = renderer.document + self.reporter = self.document.reporter + self.node = renderer.current_node + self.match_titles = True + + def get_source(self, lineno: Optional[int] = None): + """Return document source path.""" + return self.document["source"] + + def get_source_and_line(self, lineno: Optional[int] = None): + """Return (source path, line) tuple for current or given line number.""" + return self.document["source"], lineno or self._lineno + + def __getattr__(self, name): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + if hasattr(RSTStateMachine, name): + msg = "{cls} has not yet implemented attribute '{name}'".format( + cls=type(self).__name__, name=name + ) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + + +class MockIncludeDirective: + """This directive uses a lot of statemachine logic that is not yet mocked. + Therefore, we treat it as a special case (at least for now). + + See: + https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment + """ + + def __init__( + self, + renderer, + name: str, + klass: Include, + arguments: list, + options: dict, + body: List[str], + token, + ): + self.renderer = renderer + self.document = renderer.document + self.name = name + self.klass = klass + self.arguments = arguments + self.options = options + self.body = body + self.lineno = token.position.line_start + self.token = token + + def run(self): + + from docutils.parsers.rst.directives.body import CodeBlock, NumberLines + + if not self.document.settings.file_insertion_enabled: + raise DirectiveError(2, 'Directive "{}" disabled.'.format(self.name)) + + source_dir = Path(self.document["source"]).absolute().parent + include_arg = "".join([s.strip() for s in self.arguments[0].splitlines()]) + + if include_arg.startswith("<") and include_arg.endswith(">"): + # # docutils "standard" includes + path = Path(self.klass.standard_include_path).joinpath(include_arg[1:-1]) + else: + # if using sphinx interpret absolute paths "correctly", + # i.e. relative to source directory + try: + sphinx_env = self.document.settings.env + _, include_arg = sphinx_env.relfn2path(self.arguments[0]) + sphinx_env.note_included(include_arg) + except AttributeError: + pass + path = Path(include_arg) + path = source_dir.joinpath(path) + + # read file + encoding = self.options.get("encoding", self.document.settings.input_encoding) + error_handler = self.document.settings.input_encoding_error_handler + # tab_width = self.options.get("tab-width", self.document.settings.tab_width) + try: + file_content = path.read_text(encoding=encoding, errors=error_handler) + except Exception as error: + raise DirectiveError( + 4, + 'Directive "{}": error reading file: {}\n{error}.'.format( + self.name, path, error + ), + ) + + # get required section of text + startline = self.options.get("start-line", None) + endline = self.options.get("end-line", None) + file_content = "\n".join(file_content.splitlines()[startline:endline]) + startline = startline or 0 + for split_on_type in ["start-after", "end-before"]: + split_on = self.options.get(split_on_type, None) + if not split_on: + continue + split_index = file_content.find(split_on) + if split_index < 0: + raise DirectiveError( + 4, + 'Directive "{}"; option "{}": text not found "{}".'.format( + self.name, split_on_type, split_on + ), + ) + if split_on_type == "start-after": + startline += split_index + len(split_on) + file_content = file_content[split_index + len(split_on) :] + else: + file_content = file_content[:split_index] + + if "literal" in self.options: + literal_block = nodes.literal_block( + file_content, source=str(path), classes=self.options.get("class", []) + ) + literal_block.line = 1 # TODO don;t think this should be 1? + self.add_name(literal_block) + if "number-lines" in self.options: + try: + startline = int(self.options["number-lines"] or 1) + except ValueError: + raise DirectiveError( + 3, ":number-lines: with non-integer " "start value" + ) + endline = startline + len(file_content.splitlines()) + if file_content.endswith("\n"): + file_content = file_content[:-1] + tokens = NumberLines([([], file_content)], startline, endline) + for classes, value in tokens: + if classes: + literal_block += nodes.inline(value, value, classes=classes) + else: + literal_block += nodes.Text(value) + else: + literal_block += nodes.Text(file_content) + return [literal_block] + if "code" in self.options: + self.options["source"] = str(path) + state_machine = MockStateMachine(self.renderer, self.lineno) + state = MockState(self.renderer, state_machine, self.lineno) + codeblock = CodeBlock( + name=self.name, + arguments=[self.options.pop("code")], + options=self.options, + content=file_content.splitlines(), + lineno=self.lineno, + content_offset=0, + block_text=file_content, + state=state, + state_machine=state_machine, + ) + return codeblock.run() + + # Here we perform a nested render, but temporarily setup the document/reporter + # with the correct document path and lineno for the included file. + source = self.renderer.document["source"] + rsource = self.renderer.reporter.source + line_func = getattr(self.renderer.reporter, "get_source_and_line", None) + try: + self.renderer.document["source"] = str(path) + self.renderer.reporter.source = str(path) + self.renderer.reporter.get_source_and_line = lambda l: (str(path), l) + self.renderer.nested_render_text(file_content, startline) + finally: + self.renderer.document["source"] = source + self.renderer.reporter.source = rsource + if line_func is not None: + self.renderer.reporter.get_source_and_line = line_func + else: + del self.renderer.reporter.get_source_and_line + return [] + + def add_name(self, node): + """Append self.options['name'] to node['names'] if it exists. + + Also normalize the name string and register it as explicit target. + """ + if "name" in self.options: + name = nodes.fully_normalize_name(self.options.pop("name")) + if "name" in node: + del node["name"] + node["names"].append(name) + self.renderer.document.note_explicit_target(node, node) diff --git a/markdown_it/myst/parse_directives.py b/markdown_it/myst/parse_directives.py new file mode 100644 index 00000000..5d0366b9 --- /dev/null +++ b/markdown_it/myst/parse_directives.py @@ -0,0 +1,170 @@ +"""Fenced code blocks are parsed as directives, +if the block starts with ``{directive_name}``, +followed by arguments on the same line. + +Directive options are read from a YAML block, +if the first content line starts with ``---``, e.g. + +:: + + ```{directive_name} arguments + --- + option1: name + option2: | + Longer text block + --- + content... + ``` + +Or the option block will be parsed if the first content line starts with ``:``, +as a YAML block consisting of every line that starts with a ``:``, e.g. + +:: + + ```{directive_name} arguments + :option1: name + :option2: other + + content... + ``` + +If the first line of a directive's content is blank, this will be stripped +from the content. +This is to allow for separation between the option block and content. + +""" +import re +from textwrap import dedent +from typing import Callable, Dict, Type + +import yaml + +from docutils.parsers.rst import Directive +from docutils.parsers.rst.directives.misc import TestDirective + + +class DirectiveParsingError(Exception): + """Raise on parsing/validation error.""" + + pass + + +def parse_directive_text( + directive_class: Type[Directive], + argument_str: str, + content: str, + validate_options: bool = True, +): + """Parse (and validate) the full directive text.""" + if directive_class.option_spec: + body, options = parse_directive_options( + content, directive_class, validate=validate_options + ) + else: + # If there are no possible options, we do not look for a YAML block + options = {} + body = content + + body_lines = body.splitlines() + + if not ( + directive_class.required_arguments + or directive_class.optional_arguments + or options + ): + # If there are no possible arguments and no option block, + # then the body starts on the argument line + if argument_str: + body_lines.insert(0, argument_str) + arguments = [] + else: + arguments = parse_directive_arguments(directive_class, argument_str) + + # remove first line of body if blank + # this is to allow space between the options and the content + if body_lines and not body_lines[0].strip(): + body_lines = body_lines[1:] + + # check for body content + if body_lines and not directive_class.has_content: + raise DirectiveParsingError("No content permitted") + + return arguments, options, body_lines + + +def parse_directive_options( + content: str, directive_class: Type[Directive], validate: bool = True +): + """Parse (and validate) the directive option section.""" + options = {} + if content.startswith("---"): + content = "\n".join(content.splitlines()[1:]) + match = re.search(r"^-{3,}", content, re.MULTILINE) + if match: + yaml_block = content[: match.start()] + content = content[match.end() + 1 :] # TODO advance line number + else: + yaml_block = content + content = "" + yaml_block = dedent(yaml_block) + try: + options = yaml.safe_load(yaml_block) or {} + except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error: + raise DirectiveParsingError("Invalid options YAML: " + str(error)) + elif content.lstrip().startswith(":"): + content_lines = content.splitlines() # type: list + yaml_lines = [] + while content_lines: + if not content_lines[0].lstrip().startswith(":"): + break + yaml_lines.append(content_lines.pop(0).lstrip()[1:]) + yaml_block = "\n".join(yaml_lines) + content = "\n".join(content_lines) + try: + options = yaml.safe_load(yaml_block) or {} + except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error: + raise DirectiveParsingError("Invalid options YAML: " + str(error)) + + if (not validate) or issubclass(directive_class, TestDirective): + # technically this directive spec only accepts one option ('option') + # but since its for testing only we accept all options + return content, options + + # check options against spec + options_spec = directive_class.option_spec # type: Dict[str, Callable] + for name, value in list(options.items()): + convertor = options_spec.get(name, None) + if convertor is None: + raise DirectiveParsingError("Unknown option: {}".format(name)) + try: + converted_value = convertor(value) + except (ValueError, TypeError) as error: + raise DirectiveParsingError( + "Invalid option value: (option: '{}'; value: {})\n{}".format( + name, value, error + ) + ) + options[name] = converted_value + + return content, options + + +def parse_directive_arguments(directive, arg_text): + """Parse (and validate) the directive argument section.""" + required = directive.required_arguments + optional = directive.optional_arguments + arguments = arg_text.split() + if len(arguments) < required: + raise DirectiveParsingError( + "{} argument(s) required, {} supplied".format(required, len(arguments)) + ) + elif len(arguments) > required + optional: + if directive.final_argument_whitespace: + arguments = arg_text.split(None, required + optional - 1) + else: + raise DirectiveParsingError( + "maximum {} argument(s) allowed, {} supplied".format( + required + optional, len(arguments) + ) + ) + return arguments diff --git a/markdown_it/myst/sphinx_renderer.py b/markdown_it/myst/sphinx_renderer.py new file mode 100644 index 00000000..a16da70a --- /dev/null +++ b/markdown_it/myst/sphinx_renderer.py @@ -0,0 +1,161 @@ +import copy +from urllib.parse import unquote + +from docutils import nodes +from docutils.parsers.rst import directives, roles + +from .doc_renderer import DocutilsRenderer + + +class SphinxRenderer(DocutilsRenderer): + """A mistletoe renderer to populate (in-place) a `docutils.document` AST. + + This is sub-class of `DocutilsRenderer` that handles sphinx cross-referencing. + """ + + def __init__(self, *args, **kwargs): + """Initialise SphinxRenderer + + :param load_sphinx_env: load a basic sphinx environment, + when using the renderer as a context manager outside if `sphinx-build` + :param sphinx_conf: a dictionary representation of the sphinx `conf.py` + :param sphinx_srcdir: a path to a source directory + (for example, can be used for `include` statements) + + To use this renderer in a 'standalone' fashion:: + + from myst_parser.block_tokens import Document + + with SphinxRenderer(load_sphinx_env=True, sphinx_conf={}) as renderer: + renderer.render(Document.read("source text")) + + """ + self.load_sphinx_env = kwargs.pop("load_sphinx_env", False) + self.sphinx_conf = kwargs.pop("sphinx_conf", None) + self.sphinx_srcdir = kwargs.pop("sphinx_srcdir", None) + super().__init__(*args, **kwargs) + + def handle_cross_reference(self, token, destination): + from sphinx import addnodes + + wrap_node = addnodes.pending_xref( + reftarget=unquote(destination), + reftype="any", + refdomain=None, # Added to enable cross-linking + refexplicit=len(token.children) > 0, + refwarn=True, + ) + self.add_line_and_source_path(wrap_node, token) + title = token.attrGet("title") + if title: + wrap_node["title"] = title + self.current_node.append(wrap_node) + text_node = nodes.literal("", "", classes=["xref", "any"]) + wrap_node.append(text_node) + with self.current_node_context(text_node): + self.render_children(token) + + def mock_sphinx_env(self, configuration=None, sourcedir=None): + """Create a minimimal Sphinx environment; + loading sphinx roles, directives, etc. + """ + from sphinx.application import builtin_extensions, Sphinx + from sphinx.config import Config + from sphinx.environment import BuildEnvironment + from sphinx.events import EventManager + from sphinx.project import Project + from sphinx.registry import SphinxComponentRegistry + from sphinx.util.tags import Tags + + class MockSphinx(Sphinx): + """Minimal sphinx init to load roles and directives.""" + + def __init__(self, confoverrides=None, srcdir=None): + self.extensions = {} + self.registry = SphinxComponentRegistry() + self.html_themes = {} + self.events = EventManager(self) + self.tags = Tags(None) + self.config = Config({}, confoverrides or {}) + self.config.pre_init_values() + self._init_i18n() + for extension in builtin_extensions: + self.registry.load_extension(self, extension) + # fresh env + self.doctreedir = None + self.srcdir = srcdir + self.confdir = None + self.outdir = None + self.project = Project(srcdir=srcdir, source_suffix=".md") + self.project.docnames = ["mock_docname"] + self.env = BuildEnvironment() + self.env.setup(self) + self.env.temp_data["docname"] = "mock_docname" + self.builder = None + + if not confoverrides: + return + + # this code is only required for more complex parsing with extensions + for extension in self.config.extensions: + self.setup_extension(extension) + buildername = "dummy" + self.preload_builder(buildername) + self.config.init_values() + self.events.emit("config-inited", self.config) + import tempfile + + with tempfile.TemporaryDirectory() as tempdir: + # creating a builder attempts to make the doctreedir + self.doctreedir = tempdir + self.builder = self.create_builder(buildername) + self.doctreedir = None + + app = MockSphinx(confoverrides=configuration, srcdir=sourcedir) + self.document.settings.env = app.env + return app + + def __enter__(self): + """If `load_sphinx_env=True`, we set up an environment, + to parse sphinx roles/directives, outside of a `sphinx-build`. + + This primarily copies the code in `sphinx.util.docutils.docutils_namespace` + and `sphinx.util.docutils.sphinx_domains`. + """ + if not self.load_sphinx_env: + return super().__enter__() + + # store currently loaded roles/directives, so we can revert on exit + self._directives = copy.copy(directives._directives) + self._roles = copy.copy(roles._roles) + # Monkey-patch directive and role dispatch, + # so that sphinx domain-specific markup takes precedence. + self._env = self.mock_sphinx_env( + configuration=self.sphinx_conf, sourcedir=self.sphinx_srcdir + ).env + from sphinx.util.docutils import sphinx_domains + + self._sphinx_domains = sphinx_domains(self._env) + self._sphinx_domains.enable() + + return super().__enter__() + + def __exit__(self, exception_type, exception_val, traceback): + if not self.load_sphinx_env: + return super().__exit__(exception_type, exception_val, traceback) + # revert loaded roles/directives + directives._directives = self._directives + roles._roles = self._roles + self._directives = None + self._roles = None + # unregister nodes (see `sphinx.util.docutils.docutils_namespace`) + from sphinx.util.docutils import additional_nodes, unregister_node + + for node in list(additional_nodes): + unregister_node(node) + additional_nodes.discard(node) + # revert directive/role function (see `sphinx.util.docutils.sphinx_domains`) + self._sphinx_domains.disable() + self._sphinx_domains = None + self._env = None + return super().__exit__(exception_type, exception_val, traceback) diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py index 8b991fd4..0fb490b0 100644 --- a/markdown_it/rules_core/block.py +++ b/markdown_it/rules_core/block.py @@ -9,6 +9,6 @@ def block(state: StateCore): token.content = state.src token.map = [0, 1] token.children = [] - state.tokens.push(token) + state.tokens.append(token) else: state.md.block.parse(state.src, state.md, state.env, state.tokens) diff --git a/try_running_renderer.py b/try_running_renderer.py index 3c08fb14..8a92ed22 100644 --- a/try_running_renderer.py +++ b/try_running_renderer.py @@ -1,61 +1,100 @@ -if __name__ == "__main__": - - from markdown_it import MarkdownIt - from markdown_it.extensions.front_matter import front_matter_plugin - from markdown_it.extensions.myst_blocks import myst_block_plugin - from markdown_it.extensions.myst_role import myst_role_plugin - from markdown_it.doc_renderer import DocRenderer - - md = ( - MarkdownIt() - .use(front_matter_plugin) - .use(myst_block_plugin) - .use(myst_role_plugin) - ) - tokens = md.parse( - """\ - --- - a: 1 - b: - - c - --- - (xyz)= - # title - a - - b *c* **g** - - h - d - > +++ - --- - ` a ` - ```a dfg - mj - ``` - ## a - - abc - === - - - - [a][b] - - [b]: s - -
A
- - a a - - ![a *A*](b) - - +++ axbc - - {role-name:}`abc` - """ - ) - - # print(get_nested(tokens)) - - doc = DocRenderer() - doc.run_render(tokens) - print(doc.document.pformat()) +from markdown_it import MarkdownIt +from markdown_it.utils import AttrDict +from markdown_it.extensions.front_matter import front_matter_plugin +from markdown_it.extensions.myst_blocks import myst_block_plugin +from markdown_it.extensions.myst_role import myst_role_plugin +from markdown_it.extensions.texmath import texmath_plugin +from markdown_it.extensions.footnote import footnote_plugin +from markdown_it.myst.sphinx_renderer import SphinxRenderer + +md = ( + MarkdownIt() + .enable("table") + .use(front_matter_plugin) + .use(myst_block_plugin) + .use(myst_role_plugin) + .use(texmath_plugin) + .use(footnote_plugin) + .disable("footnote_inline") + # disable this for now, because it need a new implementation in the renderer + .disable("footnote_tail") + # we don't want to yet remove un-referenced, because they may be referenced + # in admonition type directives + # we need to do our own post process to gather them + # (and also add nodes.transition() above) +) +env = AttrDict() +tokens = md.parse( + """\ +--- +a: 1 +b: + - c +--- +(xyz)= +# title +a +- b *c* **g** + - h +d +> +++ +--- +` a ` +```a dfg +mj +``` +## a + +abc +=== + + + +[a][b] + +[b]: s "a" + +
A
+ +a a + +![a *A*](b) + ++++ axbc + +{sub}`abc` + + +|a|*b* | +|-|--: | + +% whatever *abc* + +$a=1$ + +$$xyz=3$$ + +[^foot]: 123 +asdas asdasda + +[^foot] + +1. 345 + +````{note} +my title +```{contents} abc +``` +[^dfg] +[^dfg]: xyc +```` +[dvs]: a +[dvs] +""", + env=env, +) + +doc = SphinxRenderer(md) +doc.run_render(tokens, env) +print(len(env["foot_refs"])) +print(doc.document.pformat())