From 581eef1c742e75727f86468e0f67d44bbe5fbce0 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 26 Mar 2020 09:34:52 +0000 Subject: [PATCH 1/4] update renderer --- markdown_it/_doc_renderer.py | 163 ++++++++++++++++++++++++++++++++--- try_running_renderer.py | 150 +++++++++++++++++++------------- 2 files changed, 239 insertions(+), 74 deletions(-) diff --git a/markdown_it/_doc_renderer.py b/markdown_it/_doc_renderer.py index 1957b31c..b3bb1206 100644 --- a/markdown_it/_doc_renderer.py +++ b/markdown_it/_doc_renderer.py @@ -1,6 +1,7 @@ """NOTE: this will eventually be moved out of core""" from contextlib import contextmanager import json +import sys from typing import List import yaml @@ -8,16 +9,19 @@ from docutils import nodes from docutils.frontend import OptionParser -# from docutils.languages import get_language -# from docutils.parsers.rst import directives, Directive, DirectiveError, roles +from docutils.languages import get_language +from docutils.parsers.rst import roles # directives, Directive, DirectiveError, roles from docutils.parsers.rst import Parser as RSTParser # from docutils.parsers.rst.directives.misc import Include -# from docutils.parsers.rst.states import RSTStateMachine, Body, Inliner +from docutils.parsers.rst.states import Inliner # RSTStateMachine, Body + # from docutils.statemachine import StringList -from docutils.utils import new_document, Reporter # noqa +from docutils.utils import new_document, Reporter from markdown_it.token import Token, nest_tokens +from markdown_it.utils import AttrDict +from markdown_it.common.utils import escapeHtml def make_document(source_path="notset") -> nodes.document: @@ -31,20 +35,41 @@ class DocRenderer: def __init__(self, options=None, env=None): self.options = options or {} - self.env = env or {} + self.env = env or AttrDict() self.rules = { k: v for k, v in self.__class__.__dict__.items() if k.startswith("render_") and k != "render_children" } self.document = make_document() + self.reporter = self.document.reporter # type: Reporter self.current_node = self.document + self.language_module = self.document.settings.language_code # type: str + get_language(self.language_module) + # TODO merge these with self.env? self.config = {} self._level_to_elem = {0: self.document} - def run_render(self, tokens: List[Token]): + def run_render(self, tokens: List[Token], env: AttrDict): + """Run the render on a token stream. + + :param tokens: the token stream + :param env: the environment sandbox associated with the tokens, + containing additional metadata like reference info + """ + self.env = env + last_map = None + # propagate line number down to inline elements + for token in tokens: + if token.map: + last_map = token.map + elif last_map: + token.meta["parent_line"] = last_map[0] + for child in token.children or []: + child.meta["parent_line"] = last_map[0] tokens = nest_tokens(tokens) for i, token in enumerate(tokens): + # skip hidden? if f"render_{token.type}" in self.rules: self.rules[f"render_{token.type}"](self, token) else: @@ -113,7 +138,7 @@ def renderInlineAsText(self, tokens: List[Token]) -> str: return result - # ### render methods for tokens + # ### render methods for commonmark tokens def render_paragraph_open(self, token): para = nodes.paragraph("") @@ -133,6 +158,12 @@ def render_bullet_list_open(self, token): with self.current_node_context(list_node, append=True): self.render_children(token) + def render_ordered_list_open(self, token): + list_node = nodes.enumerated_list() + self.add_line_and_source_path(list_node, token) + with self.current_node_context(list_node, append=True): + self.render_children(token) + def render_list_item_open(self, token): item_node = nodes.list_item() self.add_line_and_source_path(item_node, token) @@ -220,6 +251,7 @@ def render_heading_open(self, token): def render_link_open(self, token): # TODO I think this is maybe already handled at this point? # refuri = escape_url(http://github.com/token.target) + # TODO identify cross-references refuri = target = token.attrGet("href") ref_node = nodes.reference(target, target, refuri=refuri) self.add_line_and_source_path(ref_node, token) @@ -240,6 +272,8 @@ def render_image(self, token): self.current_node.append(img_node) + # ### render methods for plugin tokens + def render_front_matter(self, token): """Pass document front matter data @@ -267,6 +301,45 @@ def render_front_matter(self, token): docinfo = dict_to_docinfo(data) self.current_node.append(docinfo) + def render_math_inline(self, token): + content = token.content + node = nodes.math(content, content) + self.add_line_and_source_path(node, token) + self.current_node.append(node) + + def render_math_block(self, token): + content = token.content + node = nodes.math_block(content, content, nowrap=False, number=None) + self.add_line_and_source_path(node, token) + self.current_node.append(node) + + def render_footnote_ref(self, token): + """Footnote references are added as auto-numbered, + .i.e. `[^a]` is read as rST `[#a]_` + """ + # TODO we now also have ^[a] the inline version (currently disabled) + # that would be rendered here + target = token.meta["label"] + refnode = nodes.footnote_reference("[^{}]".format(target)) + self.add_line_and_source_path(refnode, token) + refnode["auto"] = 1 + refnode["refname"] = target + # refnode += nodes.Text(token.target) + self.document.note_autofootnote_ref(refnode) + self.document.note_footnote_ref(refnode) + self.current_node.append(refnode) + + def render_footnote_reference_open(self, token): + target = token.meta["label"] + footnote = nodes.footnote() + self.add_line_and_source_path(footnote, token) + footnote["names"].append(target) + footnote["auto"] = 1 + self.document.note_autofootnote(footnote) + self.document.note_explicit_target(footnote, footnote) + with self.current_node_context(footnote, append=True): + self.render_children(token) + def render_myst_block_break(self, token): block_break = nodes.comment(token.content, token.content) block_break["classes"] += ["block_break"] @@ -282,14 +355,33 @@ def render_myst_target(self, token): self.document.note_explicit_target(target, self.current_node) self.current_node.append(target) - def render_myst_role(self, token): + def render_myst_line_comment(self, token): + self.current_node.append(nodes.comment(token.content, token.content)) + def render_myst_role(self, token): name = token.meta["name"] - # TODO representing as literal for place-holder - content = f":{name}:`{token.content}`" - node = nodes.literal(content, content) - self.add_line_and_source_path(node, token) - self.current_node.append(node) + text = escapeHtml(token.content) # TODO check this + rawsource = f":{name}:`{token.content}`" + lineno = token.meta.get("parent_line", 0) + role_func, messages = roles.role( + name, self.language_module, lineno, self.reporter + ) + inliner = MockInliner(self, lineno) + if role_func: + nodes, messages2 = role_func(name, rawsource, text, lineno, inliner) + # return nodes, messages + messages2 + self.current_node += nodes + else: + message = self.reporter.error( + 'Unknown interpreted text role "{}".'.format(name), line=lineno + ) + problematic = inliner.problematic(text, rawsource, message) + self.current_node += problematic + + # # TODO representing as literal for place-holder + # node = nodes.literal(rawsource, rawsource) + # self.add_line_and_source_path(node, token) + # self.current_node.append(node) # def render_table_open(self, token): # # print(token) @@ -326,3 +418,48 @@ def dict_to_docinfo(data): field_node += nodes.field_body(value, nodes.Text(value, value)) docinfo += field_node return docinfo + + +class MockingError(Exception): + """An exception to signal an error during mocking of docutils components.""" + + +class MockInliner: + """A mock version of `docutils.parsers.rst.states.Inliner`. + + This is parsed to role functions. + """ + + def __init__(self, renderer: DocRenderer, lineno: int): + self._renderer = renderer + self.document = renderer.document + self.reporter = renderer.document.reporter + if not hasattr(self.reporter, "get_source_and_line"): + # TODO this is called by some roles, + # but I can't see how that would work in RST? + self.reporter.get_source_and_line = lambda l: (self.document["source"], l) + self.parent = renderer.current_node + self.language = renderer.language_module + self.rfc_url = "rfc%d.html" + + def problematic(self, text: str, rawsource: str, message: nodes.system_message): + msgid = self.document.set_id(message, self.parent) + problematic = nodes.problematic(rawsource, rawsource, refid=msgid) + prbid = self.document.set_id(problematic) + message.add_backref(prbid) + return problematic + + # TODO add parse method + + def __getattr__(self, name): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + # TODO use document.reporter mechanism? + if hasattr(Inliner, name): + msg = "{cls} has not yet implemented attribute '{name}'".format( + cls=type(self).__name__, name=name + ) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) diff --git a/try_running_renderer.py b/try_running_renderer.py index 3c08fb14..58a14d30 100644 --- a/try_running_renderer.py +++ b/try_running_renderer.py @@ -1,61 +1,89 @@ -if __name__ == "__main__": - - from markdown_it import MarkdownIt - from markdown_it.extensions.front_matter import front_matter_plugin - from markdown_it.extensions.myst_blocks import myst_block_plugin - from markdown_it.extensions.myst_role import myst_role_plugin - from markdown_it.doc_renderer import DocRenderer - - md = ( - MarkdownIt() - .use(front_matter_plugin) - .use(myst_block_plugin) - .use(myst_role_plugin) - ) - tokens = md.parse( - """\ - --- - a: 1 - b: - - c - --- - (xyz)= - # title - a - - b *c* **g** - - h - d - > +++ - --- - ` a ` - ```a dfg - mj - ``` - ## a - - abc - === - - - - [a][b] - - [b]: s - -
A
- - a a - - ![a *A*](b) - - +++ axbc - - {role-name:}`abc` - """ - ) - - # print(get_nested(tokens)) - - doc = DocRenderer() - doc.run_render(tokens) - print(doc.document.pformat()) +from markdown_it import MarkdownIt +from markdown_it.utils import AttrDict +from markdown_it.extensions.front_matter import front_matter_plugin +from markdown_it.extensions.myst_blocks import myst_block_plugin +from markdown_it.extensions.myst_role import myst_role_plugin +from markdown_it.extensions.texmath import texmath_plugin +from markdown_it.extensions.footnote import footnote_plugin +from markdown_it._doc_renderer import DocRenderer + +md = ( + MarkdownIt() + .enable("table") + .use(front_matter_plugin) + .use(myst_block_plugin) + .use(myst_role_plugin) + .use(texmath_plugin) + .use(footnote_plugin) + .disable("footnote_inline") + # disable this for now, because it need a new implementation in the renderer + .disable("footnote_tail") + # we don't want to yet remove un-referenced, because they may be referenced + # in admonition type directives + # we need to do our own post process to gather them + # (and also add nodes.transition() above) +) +env = AttrDict() +tokens = md.parse( + """\ +--- +a: 1 +b: + - c +--- +(xyz)= +# title +a +- b *c* **g** + - h +d +> +++ +--- +` a ` +```a dfg +mj +``` +## a + +abc +=== + + + +[a][b] + +[b]: s + +
A
+ +a a + +![a *A*](b) + ++++ axbc + +{sub}`abc` + + +|a|*b* | +|-|--: | + +% whatever *abc* + +$a=1$ + +$$xyz=3$$ + +[^foot]: 123 +asdas asdasda + +[^foot] + +1. 345 +""", + env=env, +) + +doc = DocRenderer() +doc.run_render(tokens, env) +print(doc.document.pformat()) From 61e69f1fe574722591df718f63228d01f0f425fa Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 26 Mar 2020 11:30:09 +0000 Subject: [PATCH 2/4] update --- markdown_it/main.py | 16 +- markdown_it/myst/mocking.py | 238 ++++++++++++++++ markdown_it/myst/parse_directives.py | 170 ++++++++++++ .../{_doc_renderer.py => myst/renderer.py} | 258 ++++++++++++------ markdown_it/rules_core/block.py | 2 +- try_running_renderer.py | 12 +- 6 files changed, 611 insertions(+), 85 deletions(-) create mode 100644 markdown_it/myst/mocking.py create mode 100644 markdown_it/myst/parse_directives.py rename markdown_it/{_doc_renderer.py => myst/renderer.py} (71%) diff --git a/markdown_it/main.py b/markdown_it/main.py index a747f863..b40a166b 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager from typing import Any, Callable, Dict, List, Optional, Union from . import helpers, presets # noqa F401 @@ -107,10 +108,12 @@ def configure(self, presets: Union[str, AttrDict]): def get_active_rules(self) -> Dict[str, List[str]]: """Return the names of all active rules.""" - return { + rules = { chain: self[chain].ruler.get_active_rules() for chain in ["core", "block", "inline"] } + rules["inline2"] = self.inline.ruler2.get_active_rules() + return rules def enable( self, names: Union[str, List[str]], ignoreInvalid: bool = False @@ -168,6 +171,15 @@ def disable( raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}") return self + @contextmanager + def reset_rules(self): + chain_rules = self.get_active_rules() + yield + for chain, rules in chain_rules.items(): + if chain != "inline2": + self[chain].ruler.enableOnly(rules) + self.inline.ruler2.enableOnly(chain_rules["inline2"]) + def add_render_rule(self, name: str, function: Callable, fmt="html"): """Add a rule for rendering a particular Token type. @@ -243,7 +255,7 @@ def parseInline(self, src: str, env: Optional[AttrDict] = None) -> List[Token]: raise TypeError(f"Input data should be an AttrDict, not {type(env)}") if not isinstance(src, str): raise TypeError(f"Input data should be a string, not {type(src)}") - state = self.core.State(src, self, env) + state = StateCore(src, self, env) state.inlineMode = True self.core.process(state) return state.tokens diff --git a/markdown_it/myst/mocking.py b/markdown_it/myst/mocking.py new file mode 100644 index 00000000..a45c01c8 --- /dev/null +++ b/markdown_it/myst/mocking.py @@ -0,0 +1,238 @@ +import re +import sys +from typing import List, Optional + +from docutils import nodes +from docutils.parsers.rst.states import Inliner, RSTStateMachine, Body +from docutils.statemachine import StringList + +# from docutils.parsers.rst.directives.misc import Include + + +class MockingError(Exception): + """An exception to signal an error during mocking of docutils components.""" + + +class MockInliner: + """A mock version of `docutils.parsers.rst.states.Inliner`. + + This is parsed to role functions. + """ + + def __init__(self, renderer, lineno: int): + self._renderer = renderer + self.document = renderer.document + self.reporter = renderer.document.reporter + if not hasattr(self.reporter, "get_source_and_line"): + # TODO this is called by some roles, + # but I can't see how that would work in RST? + self.reporter.get_source_and_line = lambda l: (self.document["source"], l) + self.parent = renderer.current_node + self.language = renderer.language_module + self.rfc_url = "rfc%d.html" + + def problematic(self, text: str, rawsource: str, message: nodes.system_message): + msgid = self.document.set_id(message, self.parent) + problematic = nodes.problematic(rawsource, rawsource, refid=msgid) + prbid = self.document.set_id(problematic) + message.add_backref(prbid) + return problematic + + # TODO add parse method + + def __getattr__(self, name): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + # TODO use document.reporter mechanism? + if hasattr(Inliner, name): + msg = "{cls} has not yet implemented attribute '{name}'".format( + cls=type(self).__name__, name=name + ) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + + +class MockState: + """A mock version of `docutils.parsers.rst.states.RSTState`. + + This is parsed to the `Directives.run()` method, + so that they may run nested parses on their content that will be parsed as markdown, + rather than RST. + """ + + def __init__(self, renderer, state_machine: "MockStateMachine", lineno: int, token): + self._renderer = renderer + self._lineno = lineno + self._token = token + self.document = renderer.document + self.state_machine = state_machine + + class Struct: + document = self.document + reporter = self.document.reporter + language = self.document.settings.language_code + title_styles = [] + section_level = max(renderer._level_to_elem) + section_bubble_up_kludge = False + inliner = MockInliner(renderer, lineno) + + self.memo = Struct + + def nested_parse( + self, + block: StringList, + input_offset: int, + node: nodes.Element, + match_titles: bool = False, + state_machine_class=None, + state_machine_kwargs=None, + ): + current_match_titles = self.state_machine.match_titles + self.state_machine.match_titles = match_titles + with self._renderer.current_node_context(node): + self._renderer.nested_render_text( + "\n".join(block), self._lineno + input_offset + ) + self.state_machine.match_titles = current_match_titles + + def inline_text(self, text: str, lineno: int): + # TODO return messages? + messages = [] + paragraph = nodes.paragraph("") + + tokens = self._renderer.md.parseInline(text, self._renderer.env) + for token in tokens: + if token.map: + token.map = [token.map[0] + lineno, token.map[1] + lineno] + # TODO propagate line numbers to children (make separate function) + + # here we instantiate a new renderer, + # so that the nested parse does not effect the current renderer, + # but we use the same env, so that link references, etc + # are added to the global parse. + from .renderer import DocRenderer + + nested_renderer = DocRenderer( + self._renderer.md, document=self.document, current_node=paragraph + ) + nested_renderer.run_render(tokens, self._renderer.env) + return paragraph.children, messages + + # U+2014 is an em-dash: + attribution_pattern = re.compile("^((?:---?(?!-)|\u2014) *)(.+)") + + def block_quote(self, lines: List[str], line_offset: int): + """Parse a block quote, which is a block of text, + followed by an (optional) attribution. + + :: + + No matter where you go, there you are. + + -- Buckaroo Banzai + """ + elements = [] + # split attribution + last_line_blank = False + blockquote_lines = lines + attribution_lines = [] + attribution_line_offset = None + # First line after a blank line must begin with a dash + for i, line in enumerate(lines): + if not line.strip(): + last_line_blank = True + continue + if not last_line_blank: + last_line_blank = False + continue + last_line_blank = False + match = self.attribution_pattern.match(line) + if not match: + continue + attribution_line_offset = i + attribution_lines = [match.group(2)] + for at_line in lines[i + 1 :]: + indented_line = at_line[len(match.group(1)) :] + if len(indented_line) != len(at_line.lstrip()): + break + attribution_lines.append(indented_line) + blockquote_lines = lines[:i] + break + # parse block + blockquote = nodes.block_quote() + self.nested_parse(blockquote_lines, line_offset, blockquote) + elements.append(blockquote) + # parse attribution + if attribution_lines: + attribution_text = "\n".join(attribution_lines) + lineno = self._lineno + line_offset + attribution_line_offset + textnodes, messages = self.inline_text(attribution_text, lineno) + attribution = nodes.attribution(attribution_text, "", *textnodes) + ( + attribution.source, + attribution.line, + ) = self.state_machine.get_source_and_line(lineno) + blockquote += attribution + elements += messages + return elements + + def build_table(self, tabledata, tableline, stub_columns=0, widths=None): + return Body.build_table(self, tabledata, tableline, stub_columns, widths) + + def build_table_row(self, rowdata, tableline): + return Body.build_table_row(self, rowdata, tableline) + + def __getattr__(self, name): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + if hasattr(Body, name): + msg = "{cls} has not yet implemented attribute '{name}'".format( + cls=type(self).__name__, name=name + ) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + + +class MockStateMachine: + """A mock version of `docutils.parsers.rst.states.RSTStateMachine`. + + This is parsed to the `Directives.run()` method. + """ + + def __init__(self, renderer, lineno: int): + self._renderer = renderer + self._lineno = lineno + self.document = renderer.document + self.reporter = self.document.reporter + self.node = renderer.current_node + self.match_titles = True + + # TODO to allow to access like attributes like input_lines, + # we would need to store the input lines, + # probably via the `Document` token, + # and maybe self._lines = lines[:], then for AstRenderer, + # ignore private attributes + + def get_source(self, lineno: Optional[int] = None): + """Return document source path.""" + return self.document["source"] + + def get_source_and_line(self, lineno: Optional[int] = None): + """Return (source path, line) tuple for current or given line number.""" + return self.document["source"], lineno or self._lineno + + def __getattr__(self, name): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + if hasattr(RSTStateMachine, name): + msg = "{cls} has not yet implemented attribute '{name}'".format( + cls=type(self).__name__, name=name + ) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) diff --git a/markdown_it/myst/parse_directives.py b/markdown_it/myst/parse_directives.py new file mode 100644 index 00000000..5d0366b9 --- /dev/null +++ b/markdown_it/myst/parse_directives.py @@ -0,0 +1,170 @@ +"""Fenced code blocks are parsed as directives, +if the block starts with ``{directive_name}``, +followed by arguments on the same line. + +Directive options are read from a YAML block, +if the first content line starts with ``---``, e.g. + +:: + + ```{directive_name} arguments + --- + option1: name + option2: | + Longer text block + --- + content... + ``` + +Or the option block will be parsed if the first content line starts with ``:``, +as a YAML block consisting of every line that starts with a ``:``, e.g. + +:: + + ```{directive_name} arguments + :option1: name + :option2: other + + content... + ``` + +If the first line of a directive's content is blank, this will be stripped +from the content. +This is to allow for separation between the option block and content. + +""" +import re +from textwrap import dedent +from typing import Callable, Dict, Type + +import yaml + +from docutils.parsers.rst import Directive +from docutils.parsers.rst.directives.misc import TestDirective + + +class DirectiveParsingError(Exception): + """Raise on parsing/validation error.""" + + pass + + +def parse_directive_text( + directive_class: Type[Directive], + argument_str: str, + content: str, + validate_options: bool = True, +): + """Parse (and validate) the full directive text.""" + if directive_class.option_spec: + body, options = parse_directive_options( + content, directive_class, validate=validate_options + ) + else: + # If there are no possible options, we do not look for a YAML block + options = {} + body = content + + body_lines = body.splitlines() + + if not ( + directive_class.required_arguments + or directive_class.optional_arguments + or options + ): + # If there are no possible arguments and no option block, + # then the body starts on the argument line + if argument_str: + body_lines.insert(0, argument_str) + arguments = [] + else: + arguments = parse_directive_arguments(directive_class, argument_str) + + # remove first line of body if blank + # this is to allow space between the options and the content + if body_lines and not body_lines[0].strip(): + body_lines = body_lines[1:] + + # check for body content + if body_lines and not directive_class.has_content: + raise DirectiveParsingError("No content permitted") + + return arguments, options, body_lines + + +def parse_directive_options( + content: str, directive_class: Type[Directive], validate: bool = True +): + """Parse (and validate) the directive option section.""" + options = {} + if content.startswith("---"): + content = "\n".join(content.splitlines()[1:]) + match = re.search(r"^-{3,}", content, re.MULTILINE) + if match: + yaml_block = content[: match.start()] + content = content[match.end() + 1 :] # TODO advance line number + else: + yaml_block = content + content = "" + yaml_block = dedent(yaml_block) + try: + options = yaml.safe_load(yaml_block) or {} + except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error: + raise DirectiveParsingError("Invalid options YAML: " + str(error)) + elif content.lstrip().startswith(":"): + content_lines = content.splitlines() # type: list + yaml_lines = [] + while content_lines: + if not content_lines[0].lstrip().startswith(":"): + break + yaml_lines.append(content_lines.pop(0).lstrip()[1:]) + yaml_block = "\n".join(yaml_lines) + content = "\n".join(content_lines) + try: + options = yaml.safe_load(yaml_block) or {} + except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error: + raise DirectiveParsingError("Invalid options YAML: " + str(error)) + + if (not validate) or issubclass(directive_class, TestDirective): + # technically this directive spec only accepts one option ('option') + # but since its for testing only we accept all options + return content, options + + # check options against spec + options_spec = directive_class.option_spec # type: Dict[str, Callable] + for name, value in list(options.items()): + convertor = options_spec.get(name, None) + if convertor is None: + raise DirectiveParsingError("Unknown option: {}".format(name)) + try: + converted_value = convertor(value) + except (ValueError, TypeError) as error: + raise DirectiveParsingError( + "Invalid option value: (option: '{}'; value: {})\n{}".format( + name, value, error + ) + ) + options[name] = converted_value + + return content, options + + +def parse_directive_arguments(directive, arg_text): + """Parse (and validate) the directive argument section.""" + required = directive.required_arguments + optional = directive.optional_arguments + arguments = arg_text.split() + if len(arguments) < required: + raise DirectiveParsingError( + "{} argument(s) required, {} supplied".format(required, len(arguments)) + ) + elif len(arguments) > required + optional: + if directive.final_argument_whitespace: + arguments = arg_text.split(None, required + optional - 1) + else: + raise DirectiveParsingError( + "maximum {} argument(s) allowed, {} supplied".format( + required + optional, len(arguments) + ) + ) + return arguments diff --git a/markdown_it/_doc_renderer.py b/markdown_it/myst/renderer.py similarity index 71% rename from markdown_it/_doc_renderer.py rename to markdown_it/myst/renderer.py index b3bb1206..7399e35e 100644 --- a/markdown_it/_doc_renderer.py +++ b/markdown_it/myst/renderer.py @@ -1,8 +1,7 @@ """NOTE: this will eventually be moved out of core""" from contextlib import contextmanager import json -import sys -from typing import List +from typing import List, Optional import yaml @@ -10,19 +9,19 @@ from docutils.frontend import OptionParser from docutils.languages import get_language -from docutils.parsers.rst import roles # directives, Directive, DirectiveError, roles +from docutils.parsers.rst import directives, Directive, DirectiveError, roles from docutils.parsers.rst import Parser as RSTParser - -# from docutils.parsers.rst.directives.misc import Include -from docutils.parsers.rst.states import Inliner # RSTStateMachine, Body - -# from docutils.statemachine import StringList +from docutils.statemachine import StringList from docutils.utils import new_document, Reporter +from markdown_it import MarkdownIt from markdown_it.token import Token, nest_tokens from markdown_it.utils import AttrDict from markdown_it.common.utils import escapeHtml +from .mocking import MockInliner, MockState, MockStateMachine, MockingError +from .parse_directives import parse_directive_text, DirectiveParsingError + def make_document(source_path="notset") -> nodes.document: """Create a new docutils document.""" @@ -33,17 +32,23 @@ def make_document(source_path="notset") -> nodes.document: class DocRenderer: __output__ = "docutils" - def __init__(self, options=None, env=None): + def __init__( + self, + md: MarkdownIt, + options=None, + document: Optional[nodes.document] = None, + current_node: Optional[nodes.Element] = None, + ): + self.md = md self.options = options or {} - self.env = env or AttrDict() self.rules = { k: v for k, v in self.__class__.__dict__.items() if k.startswith("render_") and k != "render_children" } - self.document = make_document() + self.document = document or make_document() self.reporter = self.document.reporter # type: Reporter - self.current_node = self.document + self.current_node = current_node or self.document self.language_module = self.document.settings.language_code # type: str get_language(self.language_module) # TODO merge these with self.env? @@ -58,8 +63,9 @@ def run_render(self, tokens: List[Token], env: AttrDict): containing additional metadata like reference info """ self.env = env - last_map = None + # propagate line number down to inline elements + last_map = None for token in tokens: if token.map: last_map = token.map @@ -67,7 +73,21 @@ def run_render(self, tokens: List[Token], env: AttrDict): token.meta["parent_line"] = last_map[0] for child in token.children or []: child.meta["parent_line"] = last_map[0] + + # nest tokens tokens = nest_tokens(tokens) + + # move footnote definitions to env + self.env["foot_refs"] = [] + new_tokens = [] + for token in tokens: + if token.type == "footnote_reference_open": + self.env["foot_refs"].append(token) + else: + new_tokens.append(token) + tokens = new_tokens + + # render for i, token in enumerate(tokens): # skip hidden? if f"render_{token.type}" in self.rules: @@ -75,6 +95,22 @@ def run_render(self, tokens: List[Token], env: AttrDict): else: print(f"no render method for: {token.type}") + # TODO log warning for duplicate references + + # add footnotes + referenced = { + v["label"] for v in self.env.get("footnotes", {}).get("list", {}).values() + } + # only output referenced + foot_refs = [f for f in self.env["foot_refs"] if f.meta["label"] in referenced] + + if foot_refs: + self.current_node.append(nodes.transition()) + for footref in foot_refs: # TODO sort by referenced + self.render_footnote_reference_open(footref) + + return self.document + @contextmanager def current_node_context(self, node, append: bool = False): """Context manager for temporarily setting the current node.""" @@ -92,6 +128,17 @@ def render_children(self, token): else: print(f"no render method for: {child.type}") + def nested_render_text(self, text: str, lineno: int): + """Render unparsed text.""" + with self.md.reset_rules(): + self.md.disable("front_matter", True) + tokens = self.md.parse(text, self.env) + for token in tokens: + if token.map: + token.map = [token.map[0] + lineno, token.map[1] + lineno] + # TODO propagate line numbers to children (make separate function) + self.run_render(tokens, self.env) + def add_line_and_source_path(self, node, token): """Copy the line number and document source path to the docutils node.""" try: @@ -203,7 +250,11 @@ def render_code_inline(self, token): def render_fence(self, token): text = token.content - language = token.info.split()[0] + language = token.info.split()[0] if token.info else "" + + if language.startswith("{") and language.endswith("}"): + return self.render_directive(token) + if not language: try: sphinx_env = self.document.settings.env @@ -301,6 +352,25 @@ def render_front_matter(self, token): docinfo = dict_to_docinfo(data) self.current_node.append(docinfo) + # def render_table_open(self, token): + # # print(token) + # # raise + + # table = nodes.table() + # table["classes"] += ["colwidths-auto"] + # self.add_line_and_source_path(table, token) + + # thead = nodes.thead() + # # TODO there can never be more than one header row (at least in mardown-it) + # header = token.children[0].children[0] + # for hrow in header.children: + # nodes.t + # style = hrow.attrGet("style") + + # tgroup = nodes.tgroup(cols) + # table += tgroup + # tgroup += thead + def render_math_inline(self, token): content = token.content node = nodes.math(content, content) @@ -378,29 +448,104 @@ def render_myst_role(self, token): problematic = inliner.problematic(text, rawsource, message) self.current_node += problematic - # # TODO representing as literal for place-holder - # node = nodes.literal(rawsource, rawsource) - # self.add_line_and_source_path(node, token) - # self.current_node.append(node) - - # def render_table_open(self, token): - # # print(token) - # # raise + def render_directive(self, token: Token): + """Render special fenced code blocks as directives.""" + first_line = token.info.split(maxsplit=1) + name = first_line[0][1:-1] + arguments = "" if len(first_line) == 1 else first_line[1] + # TODO directive name white/black lists + content = token.content + position = token.map[0] + self.document.current_line = position + + # get directive class + directive_class, messages = directives.directive( + name, self.language_module, self.document + ) # type: (Directive, list) + if not directive_class: + error = self.reporter.error( + "Unknown directive type '{}'\n".format(name), + # nodes.literal_block(content, content), + line=position, + ) + self.current_node += [error] + messages + return - # table = nodes.table() - # table["classes"] += ["colwidths-auto"] - # self.add_line_and_source_path(table, token) + try: + arguments, options, body_lines = parse_directive_text( + directive_class, arguments, content + ) + except DirectiveParsingError as error: + error = self.reporter.error( + "Directive '{}':\n{}".format(name, error), + nodes.literal_block(content, content), + line=position, + ) + self.current_node += [error] + return - # thead = nodes.thead() - # # TODO there can never be more than one header row (at least in mardown-it) - # header = token.children[0].children[0] - # for hrow in header.children: - # nodes.t - # style = hrow.attrGet("style") + # initialise directive + # TODO Include + # if issubclass(directive_class, Include): + # directive_instance = MockIncludeDirective( + # self, + # name=name, + # klass=directive_class, + # arguments=arguments, + # options=options, + # body=body_lines, + # token=token, + # ) + else: + state_machine = MockStateMachine(self, position) + state = MockState(self, state_machine, position, token=token) + directive_instance = directive_class( + name=name, + # the list of positional arguments + arguments=arguments, + # a dictionary mapping option names to values + options=options, + # the directive content line by line + content=StringList(body_lines, self.document["source"]), + # the absolute line number of the first line of the directive + lineno=position, + # the line offset of the first line of the content + content_offset=0, # TODO get content offset from `parse_directive_text` + # a string containing the entire directive + block_text="\n".join(body_lines), + state=state, + state_machine=state_machine, + ) - # tgroup = nodes.tgroup(cols) - # table += tgroup - # tgroup += thead + # run directive + try: + result = directive_instance.run() + except DirectiveError as error: + msg_node = self.reporter.system_message( + error.level, error.msg, line=position + ) + msg_node += nodes.literal_block(content, content) + result = [msg_node] + except MockingError as exc: + error = self.reporter.error( + "Directive '{}' cannot be mocked:\n{}: {}".format( + name, exc.__class__.__name__, exc + ), + nodes.literal_block(content, content), + line=position, + ) + self.current_node += [error] + return + assert isinstance( + result, list + ), 'Directive "{}" must return a list of nodes.'.format(name) + for i in range(len(result)): + assert isinstance( + result[i], nodes.Node + ), 'Directive "{}" returned non-Node object (index {}): {}'.format( + name, i, result[i] + ) + self.current_node += result def dict_to_docinfo(data): @@ -418,48 +563,3 @@ def dict_to_docinfo(data): field_node += nodes.field_body(value, nodes.Text(value, value)) docinfo += field_node return docinfo - - -class MockingError(Exception): - """An exception to signal an error during mocking of docutils components.""" - - -class MockInliner: - """A mock version of `docutils.parsers.rst.states.Inliner`. - - This is parsed to role functions. - """ - - def __init__(self, renderer: DocRenderer, lineno: int): - self._renderer = renderer - self.document = renderer.document - self.reporter = renderer.document.reporter - if not hasattr(self.reporter, "get_source_and_line"): - # TODO this is called by some roles, - # but I can't see how that would work in RST? - self.reporter.get_source_and_line = lambda l: (self.document["source"], l) - self.parent = renderer.current_node - self.language = renderer.language_module - self.rfc_url = "rfc%d.html" - - def problematic(self, text: str, rawsource: str, message: nodes.system_message): - msgid = self.document.set_id(message, self.parent) - problematic = nodes.problematic(rawsource, rawsource, refid=msgid) - prbid = self.document.set_id(problematic) - message.add_backref(prbid) - return problematic - - # TODO add parse method - - def __getattr__(self, name): - """This method is only be called if the attribute requested has not - been defined. Defined attributes will not be overridden. - """ - # TODO use document.reporter mechanism? - if hasattr(Inliner, name): - msg = "{cls} has not yet implemented attribute '{name}'".format( - cls=type(self).__name__, name=name - ) - raise MockingError(msg).with_traceback(sys.exc_info()[2]) - msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name) - raise MockingError(msg).with_traceback(sys.exc_info()[2]) diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py index 8b991fd4..0fb490b0 100644 --- a/markdown_it/rules_core/block.py +++ b/markdown_it/rules_core/block.py @@ -9,6 +9,6 @@ def block(state: StateCore): token.content = state.src token.map = [0, 1] token.children = [] - state.tokens.push(token) + state.tokens.append(token) else: state.md.block.parse(state.src, state.md, state.env, state.tokens) diff --git a/try_running_renderer.py b/try_running_renderer.py index 58a14d30..4bc38eea 100644 --- a/try_running_renderer.py +++ b/try_running_renderer.py @@ -5,7 +5,7 @@ from markdown_it.extensions.myst_role import myst_role_plugin from markdown_it.extensions.texmath import texmath_plugin from markdown_it.extensions.footnote import footnote_plugin -from markdown_it._doc_renderer import DocRenderer +from markdown_it.myst.renderer import DocRenderer md = ( MarkdownIt() @@ -80,10 +80,16 @@ [^foot] 1. 345 + +````{note} +my title +```{contents} abc +``` +```` """, env=env, ) - -doc = DocRenderer() +print(env) +doc = DocRenderer(md) doc.run_render(tokens, env) print(doc.document.pformat()) From aae614db5e394072ee1359cb879f4a233062da1b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 26 Mar 2020 12:56:53 +0000 Subject: [PATCH 3/4] add nesting --- markdown_it/myst/mocking.py | 181 ++++++++++++++++++++++++++++++++--- markdown_it/myst/renderer.py | 95 +++++++++++------- try_running_renderer.py | 5 +- 3 files changed, 235 insertions(+), 46 deletions(-) diff --git a/markdown_it/myst/mocking.py b/markdown_it/myst/mocking.py index a45c01c8..ef6d8d39 100644 --- a/markdown_it/myst/mocking.py +++ b/markdown_it/myst/mocking.py @@ -1,13 +1,14 @@ +from pathlib import Path import re import sys from typing import List, Optional from docutils import nodes from docutils.parsers.rst.states import Inliner, RSTStateMachine, Body +from docutils.parsers.rst import DirectiveError +from docutils.parsers.rst.directives.misc import Include from docutils.statemachine import StringList -# from docutils.parsers.rst.directives.misc import Include - class MockingError(Exception): """An exception to signal an error during mocking of docutils components.""" @@ -62,10 +63,9 @@ class MockState: rather than RST. """ - def __init__(self, renderer, state_machine: "MockStateMachine", lineno: int, token): + def __init__(self, renderer, state_machine: "MockStateMachine", lineno: int): self._renderer = renderer self._lineno = lineno - self._token = token self.document = renderer.document self.state_machine = state_machine @@ -106,7 +106,6 @@ def inline_text(self, text: str, lineno: int): for token in tokens: if token.map: token.map = [token.map[0] + lineno, token.map[1] + lineno] - # TODO propagate line numbers to children (make separate function) # here we instantiate a new renderer, # so that the nested parse does not effect the current renderer, @@ -117,7 +116,7 @@ def inline_text(self, text: str, lineno: int): nested_renderer = DocRenderer( self._renderer.md, document=self.document, current_node=paragraph ) - nested_renderer.run_render(tokens, self._renderer.env) + nested_renderer.run_render(tokens, self._renderer.env, ouput_footnotes=False) return paragraph.children, messages # U+2014 is an em-dash: @@ -211,12 +210,6 @@ def __init__(self, renderer, lineno: int): self.node = renderer.current_node self.match_titles = True - # TODO to allow to access like attributes like input_lines, - # we would need to store the input lines, - # probably via the `Document` token, - # and maybe self._lines = lines[:], then for AstRenderer, - # ignore private attributes - def get_source(self, lineno: Optional[int] = None): """Return document source path.""" return self.document["source"] @@ -236,3 +229,167 @@ def __getattr__(self, name): raise MockingError(msg).with_traceback(sys.exc_info()[2]) msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name) raise MockingError(msg).with_traceback(sys.exc_info()[2]) + + +class MockIncludeDirective: + """This directive uses a lot of statemachine logic that is not yet mocked. + Therefore, we treat it as a special case (at least for now). + + See: + https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment + """ + + def __init__( + self, + renderer, + name: str, + klass: Include, + arguments: list, + options: dict, + body: List[str], + token, + ): + self.renderer = renderer + self.document = renderer.document + self.name = name + self.klass = klass + self.arguments = arguments + self.options = options + self.body = body + self.lineno = token.position.line_start + self.token = token + + def run(self): + + from docutils.parsers.rst.directives.body import CodeBlock, NumberLines + + if not self.document.settings.file_insertion_enabled: + raise DirectiveError(2, 'Directive "{}" disabled.'.format(self.name)) + + source_dir = Path(self.document["source"]).absolute().parent + include_arg = "".join([s.strip() for s in self.arguments[0].splitlines()]) + + if include_arg.startswith("<") and include_arg.endswith(">"): + # # docutils "standard" includes + path = Path(self.klass.standard_include_path).joinpath(include_arg[1:-1]) + else: + # if using sphinx interpret absolute paths "correctly", + # i.e. relative to source directory + try: + sphinx_env = self.document.settings.env + _, include_arg = sphinx_env.relfn2path(self.arguments[0]) + sphinx_env.note_included(include_arg) + except AttributeError: + pass + path = Path(include_arg) + path = source_dir.joinpath(path) + + # read file + encoding = self.options.get("encoding", self.document.settings.input_encoding) + error_handler = self.document.settings.input_encoding_error_handler + # tab_width = self.options.get("tab-width", self.document.settings.tab_width) + try: + file_content = path.read_text(encoding=encoding, errors=error_handler) + except Exception as error: + raise DirectiveError( + 4, + 'Directive "{}": error reading file: {}\n{error}.'.format( + self.name, path, error + ), + ) + + # get required section of text + startline = self.options.get("start-line", None) + endline = self.options.get("end-line", None) + file_content = "\n".join(file_content.splitlines()[startline:endline]) + startline = startline or 0 + for split_on_type in ["start-after", "end-before"]: + split_on = self.options.get(split_on_type, None) + if not split_on: + continue + split_index = file_content.find(split_on) + if split_index < 0: + raise DirectiveError( + 4, + 'Directive "{}"; option "{}": text not found "{}".'.format( + self.name, split_on_type, split_on + ), + ) + if split_on_type == "start-after": + startline += split_index + len(split_on) + file_content = file_content[split_index + len(split_on) :] + else: + file_content = file_content[:split_index] + + if "literal" in self.options: + literal_block = nodes.literal_block( + file_content, source=str(path), classes=self.options.get("class", []) + ) + literal_block.line = 1 # TODO don;t think this should be 1? + self.add_name(literal_block) + if "number-lines" in self.options: + try: + startline = int(self.options["number-lines"] or 1) + except ValueError: + raise DirectiveError( + 3, ":number-lines: with non-integer " "start value" + ) + endline = startline + len(file_content.splitlines()) + if file_content.endswith("\n"): + file_content = file_content[:-1] + tokens = NumberLines([([], file_content)], startline, endline) + for classes, value in tokens: + if classes: + literal_block += nodes.inline(value, value, classes=classes) + else: + literal_block += nodes.Text(value) + else: + literal_block += nodes.Text(file_content) + return [literal_block] + if "code" in self.options: + self.options["source"] = str(path) + state_machine = MockStateMachine(self.renderer, self.lineno) + state = MockState(self.renderer, state_machine, self.lineno) + codeblock = CodeBlock( + name=self.name, + arguments=[self.options.pop("code")], + options=self.options, + content=file_content.splitlines(), + lineno=self.lineno, + content_offset=0, + block_text=file_content, + state=state, + state_machine=state_machine, + ) + return codeblock.run() + + # Here we perform a nested render, but temporarily setup the document/reporter + # with the correct document path and lineno for the included file. + source = self.renderer.document["source"] + rsource = self.renderer.reporter.source + line_func = getattr(self.renderer.reporter, "get_source_and_line", None) + try: + self.renderer.document["source"] = str(path) + self.renderer.reporter.source = str(path) + self.renderer.reporter.get_source_and_line = lambda l: (str(path), l) + self.renderer.nested_render_text(file_content, startline) + finally: + self.renderer.document["source"] = source + self.renderer.reporter.source = rsource + if line_func is not None: + self.renderer.reporter.get_source_and_line = line_func + else: + del self.renderer.reporter.get_source_and_line + return [] + + def add_name(self, node): + """Append self.options['name'] to node['names'] if it exists. + + Also normalize the name string and register it as explicit target. + """ + if "name" in self.options: + name = nodes.fully_normalize_name(self.options.pop("name")) + if "name" in node: + del node["name"] + node["names"].append(name) + self.renderer.document.note_explicit_target(node, node) diff --git a/markdown_it/myst/renderer.py b/markdown_it/myst/renderer.py index 7399e35e..0da8548d 100644 --- a/markdown_it/myst/renderer.py +++ b/markdown_it/myst/renderer.py @@ -11,6 +11,7 @@ from docutils.languages import get_language from docutils.parsers.rst import directives, Directive, DirectiveError, roles from docutils.parsers.rst import Parser as RSTParser +from docutils.parsers.rst.directives.misc import Include from docutils.statemachine import StringList from docutils.utils import new_document, Reporter @@ -19,7 +20,13 @@ from markdown_it.utils import AttrDict from markdown_it.common.utils import escapeHtml -from .mocking import MockInliner, MockState, MockStateMachine, MockingError +from .mocking import ( + MockInliner, + MockState, + MockStateMachine, + MockingError, + MockIncludeDirective, +) from .parse_directives import parse_directive_text, DirectiveParsingError @@ -55,7 +62,7 @@ def __init__( self.config = {} self._level_to_elem = {0: self.document} - def run_render(self, tokens: List[Token], env: AttrDict): + def run_render(self, tokens: List[Token], env: AttrDict, ouput_footnotes=True): """Run the render on a token stream. :param tokens: the token stream @@ -65,20 +72,15 @@ def run_render(self, tokens: List[Token], env: AttrDict): self.env = env # propagate line number down to inline elements - last_map = None for token in tokens: - if token.map: - last_map = token.map - elif last_map: - token.meta["parent_line"] = last_map[0] for child in token.children or []: - child.meta["parent_line"] = last_map[0] + child.map = token.map # nest tokens tokens = nest_tokens(tokens) # move footnote definitions to env - self.env["foot_refs"] = [] + self.env.setdefault("foot_refs", []) new_tokens = [] for token in tokens: if token.type == "footnote_reference_open": @@ -97,6 +99,9 @@ def run_render(self, tokens: List[Token], env: AttrDict): # TODO log warning for duplicate references + if not ouput_footnotes: + return self.document + # add footnotes referenced = { v["label"] for v in self.env.get("footnotes", {}).get("list", {}).values() @@ -111,6 +116,42 @@ def run_render(self, tokens: List[Token], env: AttrDict): return self.document + def nested_render_text(self, text: str, lineno: int): + """Render unparsed text.""" + + # parse without front matter + with self.md.reset_rules(): + self.md.disable("front_matter", True) + tokens = self.md.parse(text, self.env) + + # set correct line numbers + for token in tokens: + if token.map: + token.map = [token.map[0] + lineno, token.map[1] + lineno] + for child in token.children or []: + child.map = token.map + + # nest tokens + tokens = nest_tokens(tokens) + + # move footnote definitions to env + self.env.setdefault("foot_refs", []) + new_tokens = [] + for token in tokens: + if token.type == "footnote_reference_open": + self.env["foot_refs"].append(token) + else: + new_tokens.append(token) + tokens = new_tokens + + # render + for i, token in enumerate(tokens): + # skip hidden? + if f"render_{token.type}" in self.rules: + self.rules[f"render_{token.type}"](self, token) + else: + print(f"no render method for: {token.type}") + @contextmanager def current_node_context(self, node, append: bool = False): """Context manager for temporarily setting the current node.""" @@ -128,17 +169,6 @@ def render_children(self, token): else: print(f"no render method for: {child.type}") - def nested_render_text(self, text: str, lineno: int): - """Render unparsed text.""" - with self.md.reset_rules(): - self.md.disable("front_matter", True) - tokens = self.md.parse(text, self.env) - for token in tokens: - if token.map: - token.map = [token.map[0] + lineno, token.map[1] + lineno] - # TODO propagate line numbers to children (make separate function) - self.run_render(tokens, self.env) - def add_line_and_source_path(self, node, token): """Copy the line number and document source path to the docutils node.""" try: @@ -432,7 +462,7 @@ def render_myst_role(self, token): name = token.meta["name"] text = escapeHtml(token.content) # TODO check this rawsource = f":{name}:`{token.content}`" - lineno = token.meta.get("parent_line", 0) + lineno = token.map[0] if token.map else 0 role_func, messages = roles.role( name, self.language_module, lineno, self.reporter ) @@ -485,20 +515,19 @@ def render_directive(self, token: Token): return # initialise directive - # TODO Include - # if issubclass(directive_class, Include): - # directive_instance = MockIncludeDirective( - # self, - # name=name, - # klass=directive_class, - # arguments=arguments, - # options=options, - # body=body_lines, - # token=token, - # ) + if issubclass(directive_class, Include): + directive_instance = MockIncludeDirective( + self, + name=name, + klass=directive_class, + arguments=arguments, + options=options, + body=body_lines, + token=token, + ) else: state_machine = MockStateMachine(self, position) - state = MockState(self, state_machine, position, token=token) + state = MockState(self, state_machine, position) directive_instance = directive_class( name=name, # the list of positional arguments diff --git a/try_running_renderer.py b/try_running_renderer.py index 4bc38eea..6c0a99e1 100644 --- a/try_running_renderer.py +++ b/try_running_renderer.py @@ -85,11 +85,14 @@ my title ```{contents} abc ``` +[^dfg] +[^dfg]: xyc ```` """, env=env, ) -print(env) + doc = DocRenderer(md) doc.run_render(tokens, env) +print(len(env["foot_refs"])) print(doc.document.pformat()) From 26a60dd67890ecf59a512233ca6ff1f9bdaefb9b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 26 Mar 2020 13:44:47 +0000 Subject: [PATCH 4/4] add sphinx renderer --- .../myst/{renderer.py => doc_renderer.py} | 68 +++++++- markdown_it/myst/mocking.py | 4 +- markdown_it/myst/sphinx_renderer.py | 161 ++++++++++++++++++ try_running_renderer.py | 8 +- 4 files changed, 227 insertions(+), 14 deletions(-) rename markdown_it/myst/{renderer.py => doc_renderer.py} (89%) create mode 100644 markdown_it/myst/sphinx_renderer.py diff --git a/markdown_it/myst/renderer.py b/markdown_it/myst/doc_renderer.py similarity index 89% rename from markdown_it/myst/renderer.py rename to markdown_it/myst/doc_renderer.py index 0da8548d..0201782d 100644 --- a/markdown_it/myst/renderer.py +++ b/markdown_it/myst/doc_renderer.py @@ -1,7 +1,10 @@ """NOTE: this will eventually be moved out of core""" from contextlib import contextmanager +import inspect import json +from os.path import splitext from typing import List, Optional +from urllib.parse import urlparse import yaml @@ -36,7 +39,7 @@ def make_document(source_path="notset") -> nodes.document: return new_document(source_path, settings=settings) -class DocRenderer: +class DocutilsRenderer: __output__ = "docutils" def __init__( @@ -50,7 +53,7 @@ def __init__( self.options = options or {} self.rules = { k: v - for k, v in self.__class__.__dict__.items() + for k, v in inspect.getmembers(self, predicate=inspect.ismethod) if k.startswith("render_") and k != "render_children" } self.document = document or make_document() @@ -93,7 +96,7 @@ def run_render(self, tokens: List[Token], env: AttrDict, ouput_footnotes=True): for i, token in enumerate(tokens): # skip hidden? if f"render_{token.type}" in self.rules: - self.rules[f"render_{token.type}"](self, token) + self.rules[f"render_{token.type}"](token) else: print(f"no render method for: {token.type}") @@ -148,7 +151,7 @@ def nested_render_text(self, text: str, lineno: int): for i, token in enumerate(tokens): # skip hidden? if f"render_{token.type}" in self.rules: - self.rules[f"render_{token.type}"](self, token) + self.rules[f"render_{token.type}"](token) else: print(f"no render method for: {token.type}") @@ -165,7 +168,7 @@ def current_node_context(self, node, append: bool = False): def render_children(self, token): for i, child in enumerate(token.children or []): if f"render_{child.type}" in self.rules: - self.rules[f"render_{child.type}"](self, child) + self.rules[f"render_{child.type}"](child) else: print(f"no render method for: {child.type}") @@ -330,10 +333,57 @@ def render_heading_open(self, token): self.current_node = section def render_link_open(self, token): - # TODO I think this is maybe already handled at this point? - # refuri = escape_url(http://github.com/token.target) - # TODO identify cross-references - refuri = target = token.attrGet("href") + if token.markup == "autolink": + return self.render_autolink(token) + + ref_node = nodes.reference() + self.add_line_and_source_path(ref_node, token) + # Check destination is supported for cross-linking and remove extension + # TODO escape urls? + destination = token.attrGet("href") + title = token.attrGet("title") + _, ext = splitext(destination) + # TODO check for other supported extensions, such as those specified in + # the Sphinx conf.py file but how to access this information? + # TODO this should probably only remove the extension for local paths, + # i.e. not uri's starting with http or other external prefix. + + # if ext.replace('.', '') in self.supported: + # destination = destination.replace(ext, '') + ref_node["refuri"] = destination + print(token) + if title: + ref_node["title"] = title + next_node = ref_node + + url_check = urlparse(destination) + # If there's not a url scheme (e.g. 'https' for 'https:...' links), + # or there is a scheme but it's not in the list of known_url_schemes, + # then assume it's a cross-reference + known_url_schemes = self.config.get("known_url_schemes", None) + if known_url_schemes: + scheme_known = url_check.scheme in known_url_schemes + else: + scheme_known = bool(url_check.scheme) + + if not url_check.fragment and not scheme_known: + self.handle_cross_reference(token, destination) + else: + self.current_node.append(next_node) + with self.current_node_context(ref_node): + self.render_children(token) + + def handle_cross_reference(self, token, destination): + # TODO use the docutils error reporting mechanisms, rather than raising + if not self.config.get("ignore_missing_refs", False): + raise NotImplementedError( + "reference not found in current document: {} (lines: {})".format( + destination, token.map + ) + ) + + def render_autolink(self, token): + refuri = target = escapeHtml(token.attrGet("href")) ref_node = nodes.reference(target, target, refuri=refuri) self.add_line_and_source_path(ref_node, token) self.current_node.append(ref_node) diff --git a/markdown_it/myst/mocking.py b/markdown_it/myst/mocking.py index ef6d8d39..af045ed7 100644 --- a/markdown_it/myst/mocking.py +++ b/markdown_it/myst/mocking.py @@ -111,9 +111,9 @@ def inline_text(self, text: str, lineno: int): # so that the nested parse does not effect the current renderer, # but we use the same env, so that link references, etc # are added to the global parse. - from .renderer import DocRenderer + from .doc_renderer import DocutilsRenderer - nested_renderer = DocRenderer( + nested_renderer = DocutilsRenderer( self._renderer.md, document=self.document, current_node=paragraph ) nested_renderer.run_render(tokens, self._renderer.env, ouput_footnotes=False) diff --git a/markdown_it/myst/sphinx_renderer.py b/markdown_it/myst/sphinx_renderer.py new file mode 100644 index 00000000..a16da70a --- /dev/null +++ b/markdown_it/myst/sphinx_renderer.py @@ -0,0 +1,161 @@ +import copy +from urllib.parse import unquote + +from docutils import nodes +from docutils.parsers.rst import directives, roles + +from .doc_renderer import DocutilsRenderer + + +class SphinxRenderer(DocutilsRenderer): + """A mistletoe renderer to populate (in-place) a `docutils.document` AST. + + This is sub-class of `DocutilsRenderer` that handles sphinx cross-referencing. + """ + + def __init__(self, *args, **kwargs): + """Initialise SphinxRenderer + + :param load_sphinx_env: load a basic sphinx environment, + when using the renderer as a context manager outside if `sphinx-build` + :param sphinx_conf: a dictionary representation of the sphinx `conf.py` + :param sphinx_srcdir: a path to a source directory + (for example, can be used for `include` statements) + + To use this renderer in a 'standalone' fashion:: + + from myst_parser.block_tokens import Document + + with SphinxRenderer(load_sphinx_env=True, sphinx_conf={}) as renderer: + renderer.render(Document.read("source text")) + + """ + self.load_sphinx_env = kwargs.pop("load_sphinx_env", False) + self.sphinx_conf = kwargs.pop("sphinx_conf", None) + self.sphinx_srcdir = kwargs.pop("sphinx_srcdir", None) + super().__init__(*args, **kwargs) + + def handle_cross_reference(self, token, destination): + from sphinx import addnodes + + wrap_node = addnodes.pending_xref( + reftarget=unquote(destination), + reftype="any", + refdomain=None, # Added to enable cross-linking + refexplicit=len(token.children) > 0, + refwarn=True, + ) + self.add_line_and_source_path(wrap_node, token) + title = token.attrGet("title") + if title: + wrap_node["title"] = title + self.current_node.append(wrap_node) + text_node = nodes.literal("", "", classes=["xref", "any"]) + wrap_node.append(text_node) + with self.current_node_context(text_node): + self.render_children(token) + + def mock_sphinx_env(self, configuration=None, sourcedir=None): + """Create a minimimal Sphinx environment; + loading sphinx roles, directives, etc. + """ + from sphinx.application import builtin_extensions, Sphinx + from sphinx.config import Config + from sphinx.environment import BuildEnvironment + from sphinx.events import EventManager + from sphinx.project import Project + from sphinx.registry import SphinxComponentRegistry + from sphinx.util.tags import Tags + + class MockSphinx(Sphinx): + """Minimal sphinx init to load roles and directives.""" + + def __init__(self, confoverrides=None, srcdir=None): + self.extensions = {} + self.registry = SphinxComponentRegistry() + self.html_themes = {} + self.events = EventManager(self) + self.tags = Tags(None) + self.config = Config({}, confoverrides or {}) + self.config.pre_init_values() + self._init_i18n() + for extension in builtin_extensions: + self.registry.load_extension(self, extension) + # fresh env + self.doctreedir = None + self.srcdir = srcdir + self.confdir = None + self.outdir = None + self.project = Project(srcdir=srcdir, source_suffix=".md") + self.project.docnames = ["mock_docname"] + self.env = BuildEnvironment() + self.env.setup(self) + self.env.temp_data["docname"] = "mock_docname" + self.builder = None + + if not confoverrides: + return + + # this code is only required for more complex parsing with extensions + for extension in self.config.extensions: + self.setup_extension(extension) + buildername = "dummy" + self.preload_builder(buildername) + self.config.init_values() + self.events.emit("config-inited", self.config) + import tempfile + + with tempfile.TemporaryDirectory() as tempdir: + # creating a builder attempts to make the doctreedir + self.doctreedir = tempdir + self.builder = self.create_builder(buildername) + self.doctreedir = None + + app = MockSphinx(confoverrides=configuration, srcdir=sourcedir) + self.document.settings.env = app.env + return app + + def __enter__(self): + """If `load_sphinx_env=True`, we set up an environment, + to parse sphinx roles/directives, outside of a `sphinx-build`. + + This primarily copies the code in `sphinx.util.docutils.docutils_namespace` + and `sphinx.util.docutils.sphinx_domains`. + """ + if not self.load_sphinx_env: + return super().__enter__() + + # store currently loaded roles/directives, so we can revert on exit + self._directives = copy.copy(directives._directives) + self._roles = copy.copy(roles._roles) + # Monkey-patch directive and role dispatch, + # so that sphinx domain-specific markup takes precedence. + self._env = self.mock_sphinx_env( + configuration=self.sphinx_conf, sourcedir=self.sphinx_srcdir + ).env + from sphinx.util.docutils import sphinx_domains + + self._sphinx_domains = sphinx_domains(self._env) + self._sphinx_domains.enable() + + return super().__enter__() + + def __exit__(self, exception_type, exception_val, traceback): + if not self.load_sphinx_env: + return super().__exit__(exception_type, exception_val, traceback) + # revert loaded roles/directives + directives._directives = self._directives + roles._roles = self._roles + self._directives = None + self._roles = None + # unregister nodes (see `sphinx.util.docutils.docutils_namespace`) + from sphinx.util.docutils import additional_nodes, unregister_node + + for node in list(additional_nodes): + unregister_node(node) + additional_nodes.discard(node) + # revert directive/role function (see `sphinx.util.docutils.sphinx_domains`) + self._sphinx_domains.disable() + self._sphinx_domains = None + self._env = None + return super().__exit__(exception_type, exception_val, traceback) diff --git a/try_running_renderer.py b/try_running_renderer.py index 6c0a99e1..8a92ed22 100644 --- a/try_running_renderer.py +++ b/try_running_renderer.py @@ -5,7 +5,7 @@ from markdown_it.extensions.myst_role import myst_role_plugin from markdown_it.extensions.texmath import texmath_plugin from markdown_it.extensions.footnote import footnote_plugin -from markdown_it.myst.renderer import DocRenderer +from markdown_it.myst.sphinx_renderer import SphinxRenderer md = ( MarkdownIt() @@ -52,7 +52,7 @@ [a][b] -[b]: s +[b]: s "a"
A
@@ -88,11 +88,13 @@ [^dfg] [^dfg]: xyc ```` +[dvs]: a +[dvs] """, env=env, ) -doc = DocRenderer(md) +doc = SphinxRenderer(md) doc.run_render(tokens, env) print(len(env["foot_refs"])) print(doc.document.pformat())