From 581eef1c742e75727f86468e0f67d44bbe5fbce0 Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Thu, 26 Mar 2020 09:34:52 +0000
Subject: [PATCH 1/4] update renderer

---
 markdown_it/_doc_renderer.py | 163 ++++++++++++++++++++++++++++++++---
 try_running_renderer.py      | 150 +++++++++++++++++++-------------
 2 files changed, 239 insertions(+), 74 deletions(-)

diff --git a/markdown_it/_doc_renderer.py b/markdown_it/_doc_renderer.py
index 1957b31c..b3bb1206 100644
--- a/markdown_it/_doc_renderer.py
+++ b/markdown_it/_doc_renderer.py
@@ -1,6 +1,7 @@
 """NOTE: this will eventually be moved out of core"""
 from contextlib import contextmanager
 import json
+import sys
 from typing import List
 
 import yaml
@@ -8,16 +9,19 @@
 from docutils import nodes
 from docutils.frontend import OptionParser
 
-# from docutils.languages import get_language
-# from docutils.parsers.rst import directives, Directive, DirectiveError, roles
+from docutils.languages import get_language
+from docutils.parsers.rst import roles  # directives, Directive, DirectiveError, roles
 from docutils.parsers.rst import Parser as RSTParser
 
 # from docutils.parsers.rst.directives.misc import Include
-# from docutils.parsers.rst.states import RSTStateMachine, Body, Inliner
+from docutils.parsers.rst.states import Inliner  # RSTStateMachine, Body
+
 # from docutils.statemachine import StringList
-from docutils.utils import new_document, Reporter  # noqa
+from docutils.utils import new_document, Reporter
 
 from markdown_it.token import Token, nest_tokens
+from markdown_it.utils import AttrDict
+from markdown_it.common.utils import escapeHtml
 
 
 def make_document(source_path="notset") -> nodes.document:
@@ -31,20 +35,41 @@ class DocRenderer:
 
     def __init__(self, options=None, env=None):
         self.options = options or {}
-        self.env = env or {}
+        self.env = env or AttrDict()
         self.rules = {
             k: v
             for k, v in self.__class__.__dict__.items()
             if k.startswith("render_") and k != "render_children"
         }
         self.document = make_document()
+        self.reporter = self.document.reporter  # type: Reporter
         self.current_node = self.document
+        self.language_module = self.document.settings.language_code  # type: str
+        get_language(self.language_module)
+        # TODO merge these with self.env?
         self.config = {}
         self._level_to_elem = {0: self.document}
 
-    def run_render(self, tokens: List[Token]):
+    def run_render(self, tokens: List[Token], env: AttrDict):
+        """Run the render on a token stream.
+
+        :param tokens: the token stream
+        :param env: the environment sandbox associated with the tokens,
+            containing additional metadata like reference info
+        """
+        self.env = env
+        last_map = None
+        # propagate line number down to inline elements
+        for token in tokens:
+            if token.map:
+                last_map = token.map
+            elif last_map:
+                token.meta["parent_line"] = last_map[0]
+            for child in token.children or []:
+                child.meta["parent_line"] = last_map[0]
         tokens = nest_tokens(tokens)
         for i, token in enumerate(tokens):
+            # skip hidden?
             if f"render_{token.type}" in self.rules:
                 self.rules[f"render_{token.type}"](self, token)
             else:
@@ -113,7 +138,7 @@ def renderInlineAsText(self, tokens: List[Token]) -> str:
 
         return result
 
-    # ### render methods for tokens
+    # ### render methods for commonmark tokens
 
     def render_paragraph_open(self, token):
         para = nodes.paragraph("")
@@ -133,6 +158,12 @@ def render_bullet_list_open(self, token):
         with self.current_node_context(list_node, append=True):
             self.render_children(token)
 
+    def render_ordered_list_open(self, token):
+        list_node = nodes.enumerated_list()
+        self.add_line_and_source_path(list_node, token)
+        with self.current_node_context(list_node, append=True):
+            self.render_children(token)
+
     def render_list_item_open(self, token):
         item_node = nodes.list_item()
         self.add_line_and_source_path(item_node, token)
@@ -220,6 +251,7 @@ def render_heading_open(self, token):
     def render_link_open(self, token):
         # TODO I think this is maybe already handled at this point?
         # refuri = escape_url(http://github.com/token.target)
+        # TODO identify cross-references
         refuri = target = token.attrGet("href")
         ref_node = nodes.reference(target, target, refuri=refuri)
         self.add_line_and_source_path(ref_node, token)
@@ -240,6 +272,8 @@ def render_image(self, token):
 
         self.current_node.append(img_node)
 
+    # ### render methods for plugin tokens
+
     def render_front_matter(self, token):
         """Pass document front matter data
 
@@ -267,6 +301,45 @@ def render_front_matter(self, token):
         docinfo = dict_to_docinfo(data)
         self.current_node.append(docinfo)
 
+    def render_math_inline(self, token):
+        content = token.content
+        node = nodes.math(content, content)
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def render_math_block(self, token):
+        content = token.content
+        node = nodes.math_block(content, content, nowrap=False, number=None)
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def render_footnote_ref(self, token):
+        """Footnote references are added as auto-numbered,
+        .i.e. `[^a]` is read as rST `[#a]_`
+        """
+        # TODO we now also have ^[a] the inline version (currently disabled)
+        # that would be rendered here
+        target = token.meta["label"]
+        refnode = nodes.footnote_reference("[^{}]".format(target))
+        self.add_line_and_source_path(refnode, token)
+        refnode["auto"] = 1
+        refnode["refname"] = target
+        # refnode += nodes.Text(token.target)
+        self.document.note_autofootnote_ref(refnode)
+        self.document.note_footnote_ref(refnode)
+        self.current_node.append(refnode)
+
+    def render_footnote_reference_open(self, token):
+        target = token.meta["label"]
+        footnote = nodes.footnote()
+        self.add_line_and_source_path(footnote, token)
+        footnote["names"].append(target)
+        footnote["auto"] = 1
+        self.document.note_autofootnote(footnote)
+        self.document.note_explicit_target(footnote, footnote)
+        with self.current_node_context(footnote, append=True):
+            self.render_children(token)
+
     def render_myst_block_break(self, token):
         block_break = nodes.comment(token.content, token.content)
         block_break["classes"] += ["block_break"]
@@ -282,14 +355,33 @@ def render_myst_target(self, token):
         self.document.note_explicit_target(target, self.current_node)
         self.current_node.append(target)
 
-    def render_myst_role(self, token):
+    def render_myst_line_comment(self, token):
+        self.current_node.append(nodes.comment(token.content, token.content))
 
+    def render_myst_role(self, token):
         name = token.meta["name"]
-        # TODO representing as literal for place-holder
-        content = f":{name}:`{token.content}`"
-        node = nodes.literal(content, content)
-        self.add_line_and_source_path(node, token)
-        self.current_node.append(node)
+        text = escapeHtml(token.content)  # TODO check this
+        rawsource = f":{name}:`{token.content}`"
+        lineno = token.meta.get("parent_line", 0)
+        role_func, messages = roles.role(
+            name, self.language_module, lineno, self.reporter
+        )
+        inliner = MockInliner(self, lineno)
+        if role_func:
+            nodes, messages2 = role_func(name, rawsource, text, lineno, inliner)
+            # return nodes, messages + messages2
+            self.current_node += nodes
+        else:
+            message = self.reporter.error(
+                'Unknown interpreted text role "{}".'.format(name), line=lineno
+            )
+            problematic = inliner.problematic(text, rawsource, message)
+            self.current_node += problematic
+
+        # # TODO representing as literal for place-holder
+        # node = nodes.literal(rawsource, rawsource)
+        # self.add_line_and_source_path(node, token)
+        # self.current_node.append(node)
 
     # def render_table_open(self, token):
     #     # print(token)
@@ -326,3 +418,48 @@ def dict_to_docinfo(data):
         field_node += nodes.field_body(value, nodes.Text(value, value))
         docinfo += field_node
     return docinfo
+
+
+class MockingError(Exception):
+    """An exception to signal an error during mocking of docutils components."""
+
+
+class MockInliner:
+    """A mock version of `docutils.parsers.rst.states.Inliner`.
+
+    This is parsed to role functions.
+    """
+
+    def __init__(self, renderer: DocRenderer, lineno: int):
+        self._renderer = renderer
+        self.document = renderer.document
+        self.reporter = renderer.document.reporter
+        if not hasattr(self.reporter, "get_source_and_line"):
+            # TODO this is called by some roles,
+            # but I can't see how that would work in RST?
+            self.reporter.get_source_and_line = lambda l: (self.document["source"], l)
+        self.parent = renderer.current_node
+        self.language = renderer.language_module
+        self.rfc_url = "rfc%d.html"
+
+    def problematic(self, text: str, rawsource: str, message: nodes.system_message):
+        msgid = self.document.set_id(message, self.parent)
+        problematic = nodes.problematic(rawsource, rawsource, refid=msgid)
+        prbid = self.document.set_id(problematic)
+        message.add_backref(prbid)
+        return problematic
+
+    # TODO add parse method
+
+    def __getattr__(self, name):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        # TODO use document.reporter mechanism?
+        if hasattr(Inliner, name):
+            msg = "{cls} has not yet implemented attribute '{name}'".format(
+                cls=type(self).__name__, name=name
+            )
+            raise MockingError(msg).with_traceback(sys.exc_info()[2])
+        msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name)
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
diff --git a/try_running_renderer.py b/try_running_renderer.py
index 3c08fb14..58a14d30 100644
--- a/try_running_renderer.py
+++ b/try_running_renderer.py
@@ -1,61 +1,89 @@
-if __name__ == "__main__":
-
-    from markdown_it import MarkdownIt
-    from markdown_it.extensions.front_matter import front_matter_plugin
-    from markdown_it.extensions.myst_blocks import myst_block_plugin
-    from markdown_it.extensions.myst_role import myst_role_plugin
-    from markdown_it.doc_renderer import DocRenderer
-
-    md = (
-        MarkdownIt()
-        .use(front_matter_plugin)
-        .use(myst_block_plugin)
-        .use(myst_role_plugin)
-    )
-    tokens = md.parse(
-        """\
-    ---
-    a: 1
-    b:
-      - c
-    ---
-    (xyz)=
-    # title
-    a
-    - b *c* **g**
-        - h
-    d
-    > +++
-    ---
-    ` a `
-    ```a dfg
-    mj
-    ```
-    ## a
-
-    abc
-    ===
-
-    <c@google.com>
-
-    [a][b]
-
-    [b]: s
-
-    <div>A</div>
-
-    a <span>a</span>
-
-    ![a *A*](b)
-
-    +++ axbc
-
-    {role-name:}`abc`
-    """
-    )
-
-    # print(get_nested(tokens))
-
-    doc = DocRenderer()
-    doc.run_render(tokens)
-    print(doc.document.pformat())
+from markdown_it import MarkdownIt
+from markdown_it.utils import AttrDict
+from markdown_it.extensions.front_matter import front_matter_plugin
+from markdown_it.extensions.myst_blocks import myst_block_plugin
+from markdown_it.extensions.myst_role import myst_role_plugin
+from markdown_it.extensions.texmath import texmath_plugin
+from markdown_it.extensions.footnote import footnote_plugin
+from markdown_it._doc_renderer import DocRenderer
+
+md = (
+    MarkdownIt()
+    .enable("table")
+    .use(front_matter_plugin)
+    .use(myst_block_plugin)
+    .use(myst_role_plugin)
+    .use(texmath_plugin)
+    .use(footnote_plugin)
+    .disable("footnote_inline")
+    # disable this for now, because it need a new implementation in the renderer
+    .disable("footnote_tail")
+    # we don't want to yet remove un-referenced, because they may be referenced
+    # in admonition type directives
+    # we need to do our own post process to gather them
+    # (and also add nodes.transition() above)
+)
+env = AttrDict()
+tokens = md.parse(
+    """\
+---
+a: 1
+b:
+    - c
+---
+(xyz)=
+# title
+a
+- b *c* **g**
+    - h
+d
+> +++
+---
+` a `
+```a dfg
+mj
+```
+## a
+
+abc
+===
+
+<c@google.com>
+
+[a][b]
+
+[b]: s
+
+<div>A</div>
+
+a <span>a</span>
+
+![a *A*](b)
+
++++ axbc
+
+{sub}`abc`
+
+
+|a|*b* |
+|-|--: |
+
+% whatever *abc*
+
+$a=1$
+
+$$xyz=3$$
+
+[^foot]: 123
+asdas asdasda
+
+[^foot]
+
+1. 345
+""",
+    env=env,
+)
+
+doc = DocRenderer()
+doc.run_render(tokens, env)
+print(doc.document.pformat())

From 61e69f1fe574722591df718f63228d01f0f425fa Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Thu, 26 Mar 2020 11:30:09 +0000
Subject: [PATCH 2/4] update

---
 markdown_it/main.py                           |  16 +-
 markdown_it/myst/mocking.py                   | 238 ++++++++++++++++
 markdown_it/myst/parse_directives.py          | 170 ++++++++++++
 .../{_doc_renderer.py => myst/renderer.py}    | 258 ++++++++++++------
 markdown_it/rules_core/block.py               |   2 +-
 try_running_renderer.py                       |  12 +-
 6 files changed, 611 insertions(+), 85 deletions(-)
 create mode 100644 markdown_it/myst/mocking.py
 create mode 100644 markdown_it/myst/parse_directives.py
 rename markdown_it/{_doc_renderer.py => myst/renderer.py} (71%)

diff --git a/markdown_it/main.py b/markdown_it/main.py
index a747f863..b40a166b 100644
--- a/markdown_it/main.py
+++ b/markdown_it/main.py
@@ -1,3 +1,4 @@
+from contextlib import contextmanager
 from typing import Any, Callable, Dict, List, Optional, Union
 
 from . import helpers, presets  # noqa F401
@@ -107,10 +108,12 @@ def configure(self, presets: Union[str, AttrDict]):
 
     def get_active_rules(self) -> Dict[str, List[str]]:
         """Return the names of all active rules."""
-        return {
+        rules = {
             chain: self[chain].ruler.get_active_rules()
             for chain in ["core", "block", "inline"]
         }
+        rules["inline2"] = self.inline.ruler2.get_active_rules()
+        return rules
 
     def enable(
         self, names: Union[str, List[str]], ignoreInvalid: bool = False
@@ -168,6 +171,15 @@ def disable(
             raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
         return self
 
+    @contextmanager
+    def reset_rules(self):
+        chain_rules = self.get_active_rules()
+        yield
+        for chain, rules in chain_rules.items():
+            if chain != "inline2":
+                self[chain].ruler.enableOnly(rules)
+        self.inline.ruler2.enableOnly(chain_rules["inline2"])
+
     def add_render_rule(self, name: str, function: Callable, fmt="html"):
         """Add a rule for rendering a particular Token type.
 
@@ -243,7 +255,7 @@ def parseInline(self, src: str, env: Optional[AttrDict] = None) -> List[Token]:
             raise TypeError(f"Input data should be an AttrDict, not {type(env)}")
         if not isinstance(src, str):
             raise TypeError(f"Input data should be a string, not {type(src)}")
-        state = self.core.State(src, self, env)
+        state = StateCore(src, self, env)
         state.inlineMode = True
         self.core.process(state)
         return state.tokens
diff --git a/markdown_it/myst/mocking.py b/markdown_it/myst/mocking.py
new file mode 100644
index 00000000..a45c01c8
--- /dev/null
+++ b/markdown_it/myst/mocking.py
@@ -0,0 +1,238 @@
+import re
+import sys
+from typing import List, Optional
+
+from docutils import nodes
+from docutils.parsers.rst.states import Inliner, RSTStateMachine, Body
+from docutils.statemachine import StringList
+
+# from docutils.parsers.rst.directives.misc import Include
+
+
+class MockingError(Exception):
+    """An exception to signal an error during mocking of docutils components."""
+
+
+class MockInliner:
+    """A mock version of `docutils.parsers.rst.states.Inliner`.
+
+    This is parsed to role functions.
+    """
+
+    def __init__(self, renderer, lineno: int):
+        self._renderer = renderer
+        self.document = renderer.document
+        self.reporter = renderer.document.reporter
+        if not hasattr(self.reporter, "get_source_and_line"):
+            # TODO this is called by some roles,
+            # but I can't see how that would work in RST?
+            self.reporter.get_source_and_line = lambda l: (self.document["source"], l)
+        self.parent = renderer.current_node
+        self.language = renderer.language_module
+        self.rfc_url = "rfc%d.html"
+
+    def problematic(self, text: str, rawsource: str, message: nodes.system_message):
+        msgid = self.document.set_id(message, self.parent)
+        problematic = nodes.problematic(rawsource, rawsource, refid=msgid)
+        prbid = self.document.set_id(problematic)
+        message.add_backref(prbid)
+        return problematic
+
+    # TODO add parse method
+
+    def __getattr__(self, name):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        # TODO use document.reporter mechanism?
+        if hasattr(Inliner, name):
+            msg = "{cls} has not yet implemented attribute '{name}'".format(
+                cls=type(self).__name__, name=name
+            )
+            raise MockingError(msg).with_traceback(sys.exc_info()[2])
+        msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name)
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
+
+
+class MockState:
+    """A mock version of `docutils.parsers.rst.states.RSTState`.
+
+    This is parsed to the `Directives.run()` method,
+    so that they may run nested parses on their content that will be parsed as markdown,
+    rather than RST.
+    """
+
+    def __init__(self, renderer, state_machine: "MockStateMachine", lineno: int, token):
+        self._renderer = renderer
+        self._lineno = lineno
+        self._token = token
+        self.document = renderer.document
+        self.state_machine = state_machine
+
+        class Struct:
+            document = self.document
+            reporter = self.document.reporter
+            language = self.document.settings.language_code
+            title_styles = []
+            section_level = max(renderer._level_to_elem)
+            section_bubble_up_kludge = False
+            inliner = MockInliner(renderer, lineno)
+
+        self.memo = Struct
+
+    def nested_parse(
+        self,
+        block: StringList,
+        input_offset: int,
+        node: nodes.Element,
+        match_titles: bool = False,
+        state_machine_class=None,
+        state_machine_kwargs=None,
+    ):
+        current_match_titles = self.state_machine.match_titles
+        self.state_machine.match_titles = match_titles
+        with self._renderer.current_node_context(node):
+            self._renderer.nested_render_text(
+                "\n".join(block), self._lineno + input_offset
+            )
+        self.state_machine.match_titles = current_match_titles
+
+    def inline_text(self, text: str, lineno: int):
+        # TODO return messages?
+        messages = []
+        paragraph = nodes.paragraph("")
+
+        tokens = self._renderer.md.parseInline(text, self._renderer.env)
+        for token in tokens:
+            if token.map:
+                token.map = [token.map[0] + lineno, token.map[1] + lineno]
+        # TODO propagate line numbers to children (make separate function)
+
+        # here we instantiate a new renderer,
+        # so that the nested parse does not effect the current renderer,
+        # but we use the same env, so that link references, etc
+        # are added to the global parse.
+        from .renderer import DocRenderer
+
+        nested_renderer = DocRenderer(
+            self._renderer.md, document=self.document, current_node=paragraph
+        )
+        nested_renderer.run_render(tokens, self._renderer.env)
+        return paragraph.children, messages
+
+    # U+2014 is an em-dash:
+    attribution_pattern = re.compile("^((?:---?(?!-)|\u2014) *)(.+)")
+
+    def block_quote(self, lines: List[str], line_offset: int):
+        """Parse a block quote, which is a block of text,
+        followed by an (optional) attribution.
+
+        ::
+
+           No matter where you go, there you are.
+
+           -- Buckaroo Banzai
+        """
+        elements = []
+        # split attribution
+        last_line_blank = False
+        blockquote_lines = lines
+        attribution_lines = []
+        attribution_line_offset = None
+        # First line after a blank line must begin with a dash
+        for i, line in enumerate(lines):
+            if not line.strip():
+                last_line_blank = True
+                continue
+            if not last_line_blank:
+                last_line_blank = False
+                continue
+            last_line_blank = False
+            match = self.attribution_pattern.match(line)
+            if not match:
+                continue
+            attribution_line_offset = i
+            attribution_lines = [match.group(2)]
+            for at_line in lines[i + 1 :]:
+                indented_line = at_line[len(match.group(1)) :]
+                if len(indented_line) != len(at_line.lstrip()):
+                    break
+                attribution_lines.append(indented_line)
+            blockquote_lines = lines[:i]
+            break
+        # parse block
+        blockquote = nodes.block_quote()
+        self.nested_parse(blockquote_lines, line_offset, blockquote)
+        elements.append(blockquote)
+        # parse attribution
+        if attribution_lines:
+            attribution_text = "\n".join(attribution_lines)
+            lineno = self._lineno + line_offset + attribution_line_offset
+            textnodes, messages = self.inline_text(attribution_text, lineno)
+            attribution = nodes.attribution(attribution_text, "", *textnodes)
+            (
+                attribution.source,
+                attribution.line,
+            ) = self.state_machine.get_source_and_line(lineno)
+            blockquote += attribution
+            elements += messages
+        return elements
+
+    def build_table(self, tabledata, tableline, stub_columns=0, widths=None):
+        return Body.build_table(self, tabledata, tableline, stub_columns, widths)
+
+    def build_table_row(self, rowdata, tableline):
+        return Body.build_table_row(self, rowdata, tableline)
+
+    def __getattr__(self, name):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        if hasattr(Body, name):
+            msg = "{cls} has not yet implemented attribute '{name}'".format(
+                cls=type(self).__name__, name=name
+            )
+            raise MockingError(msg).with_traceback(sys.exc_info()[2])
+        msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name)
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
+
+
+class MockStateMachine:
+    """A mock version of `docutils.parsers.rst.states.RSTStateMachine`.
+
+    This is parsed to the `Directives.run()` method.
+    """
+
+    def __init__(self, renderer, lineno: int):
+        self._renderer = renderer
+        self._lineno = lineno
+        self.document = renderer.document
+        self.reporter = self.document.reporter
+        self.node = renderer.current_node
+        self.match_titles = True
+
+        # TODO to allow to access like attributes like input_lines,
+        # we would need to store the input lines,
+        # probably via the `Document` token,
+        # and maybe self._lines = lines[:], then for AstRenderer,
+        # ignore private attributes
+
+    def get_source(self, lineno: Optional[int] = None):
+        """Return document source path."""
+        return self.document["source"]
+
+    def get_source_and_line(self, lineno: Optional[int] = None):
+        """Return (source path, line) tuple for current or given line number."""
+        return self.document["source"], lineno or self._lineno
+
+    def __getattr__(self, name):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        if hasattr(RSTStateMachine, name):
+            msg = "{cls} has not yet implemented attribute '{name}'".format(
+                cls=type(self).__name__, name=name
+            )
+            raise MockingError(msg).with_traceback(sys.exc_info()[2])
+        msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name)
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
diff --git a/markdown_it/myst/parse_directives.py b/markdown_it/myst/parse_directives.py
new file mode 100644
index 00000000..5d0366b9
--- /dev/null
+++ b/markdown_it/myst/parse_directives.py
@@ -0,0 +1,170 @@
+"""Fenced code blocks are parsed as directives,
+if the block starts with ``{directive_name}``,
+followed by arguments on the same line.
+
+Directive options are read from a YAML block,
+if the first content line starts with ``---``, e.g.
+
+::
+
+    ```{directive_name} arguments
+    ---
+    option1: name
+    option2: |
+        Longer text block
+    ---
+    content...
+    ```
+
+Or the option block will be parsed if the first content line starts with ``:``,
+as a YAML block consisting of every line that starts with a ``:``, e.g.
+
+::
+
+    ```{directive_name} arguments
+    :option1: name
+    :option2: other
+
+    content...
+    ```
+
+If the first line of a directive's content is blank, this will be stripped
+from the content.
+This is to allow for separation between the option block and content.
+
+"""
+import re
+from textwrap import dedent
+from typing import Callable, Dict, Type
+
+import yaml
+
+from docutils.parsers.rst import Directive
+from docutils.parsers.rst.directives.misc import TestDirective
+
+
+class DirectiveParsingError(Exception):
+    """Raise on parsing/validation error."""
+
+    pass
+
+
+def parse_directive_text(
+    directive_class: Type[Directive],
+    argument_str: str,
+    content: str,
+    validate_options: bool = True,
+):
+    """Parse (and validate) the full directive text."""
+    if directive_class.option_spec:
+        body, options = parse_directive_options(
+            content, directive_class, validate=validate_options
+        )
+    else:
+        # If there are no possible options, we do not look for a YAML block
+        options = {}
+        body = content
+
+    body_lines = body.splitlines()
+
+    if not (
+        directive_class.required_arguments
+        or directive_class.optional_arguments
+        or options
+    ):
+        # If there are no possible arguments and no option block,
+        # then the body starts on the argument line
+        if argument_str:
+            body_lines.insert(0, argument_str)
+        arguments = []
+    else:
+        arguments = parse_directive_arguments(directive_class, argument_str)
+
+    # remove first line of body if blank
+    # this is to allow space between the options and the content
+    if body_lines and not body_lines[0].strip():
+        body_lines = body_lines[1:]
+
+    # check for body content
+    if body_lines and not directive_class.has_content:
+        raise DirectiveParsingError("No content permitted")
+
+    return arguments, options, body_lines
+
+
+def parse_directive_options(
+    content: str, directive_class: Type[Directive], validate: bool = True
+):
+    """Parse (and validate) the directive option section."""
+    options = {}
+    if content.startswith("---"):
+        content = "\n".join(content.splitlines()[1:])
+        match = re.search(r"^-{3,}", content, re.MULTILINE)
+        if match:
+            yaml_block = content[: match.start()]
+            content = content[match.end() + 1 :]  # TODO advance line number
+        else:
+            yaml_block = content
+            content = ""
+        yaml_block = dedent(yaml_block)
+        try:
+            options = yaml.safe_load(yaml_block) or {}
+        except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error:
+            raise DirectiveParsingError("Invalid options YAML: " + str(error))
+    elif content.lstrip().startswith(":"):
+        content_lines = content.splitlines()  # type: list
+        yaml_lines = []
+        while content_lines:
+            if not content_lines[0].lstrip().startswith(":"):
+                break
+            yaml_lines.append(content_lines.pop(0).lstrip()[1:])
+        yaml_block = "\n".join(yaml_lines)
+        content = "\n".join(content_lines)
+        try:
+            options = yaml.safe_load(yaml_block) or {}
+        except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error:
+            raise DirectiveParsingError("Invalid options YAML: " + str(error))
+
+    if (not validate) or issubclass(directive_class, TestDirective):
+        # technically this directive spec only accepts one option ('option')
+        # but since its for testing only we accept all options
+        return content, options
+
+    # check options against spec
+    options_spec = directive_class.option_spec  # type: Dict[str, Callable]
+    for name, value in list(options.items()):
+        convertor = options_spec.get(name, None)
+        if convertor is None:
+            raise DirectiveParsingError("Unknown option: {}".format(name))
+        try:
+            converted_value = convertor(value)
+        except (ValueError, TypeError) as error:
+            raise DirectiveParsingError(
+                "Invalid option value: (option: '{}'; value: {})\n{}".format(
+                    name, value, error
+                )
+            )
+        options[name] = converted_value
+
+    return content, options
+
+
+def parse_directive_arguments(directive, arg_text):
+    """Parse (and validate) the directive argument section."""
+    required = directive.required_arguments
+    optional = directive.optional_arguments
+    arguments = arg_text.split()
+    if len(arguments) < required:
+        raise DirectiveParsingError(
+            "{} argument(s) required, {} supplied".format(required, len(arguments))
+        )
+    elif len(arguments) > required + optional:
+        if directive.final_argument_whitespace:
+            arguments = arg_text.split(None, required + optional - 1)
+        else:
+            raise DirectiveParsingError(
+                "maximum {} argument(s) allowed, {} supplied".format(
+                    required + optional, len(arguments)
+                )
+            )
+    return arguments
diff --git a/markdown_it/_doc_renderer.py b/markdown_it/myst/renderer.py
similarity index 71%
rename from markdown_it/_doc_renderer.py
rename to markdown_it/myst/renderer.py
index b3bb1206..7399e35e 100644
--- a/markdown_it/_doc_renderer.py
+++ b/markdown_it/myst/renderer.py
@@ -1,8 +1,7 @@
 """NOTE: this will eventually be moved out of core"""
 from contextlib import contextmanager
 import json
-import sys
-from typing import List
+from typing import List, Optional
 
 import yaml
 
@@ -10,19 +9,19 @@
 from docutils.frontend import OptionParser
 
 from docutils.languages import get_language
-from docutils.parsers.rst import roles  # directives, Directive, DirectiveError, roles
+from docutils.parsers.rst import directives, Directive, DirectiveError, roles
 from docutils.parsers.rst import Parser as RSTParser
-
-# from docutils.parsers.rst.directives.misc import Include
-from docutils.parsers.rst.states import Inliner  # RSTStateMachine, Body
-
-# from docutils.statemachine import StringList
+from docutils.statemachine import StringList
 from docutils.utils import new_document, Reporter
 
+from markdown_it import MarkdownIt
 from markdown_it.token import Token, nest_tokens
 from markdown_it.utils import AttrDict
 from markdown_it.common.utils import escapeHtml
 
+from .mocking import MockInliner, MockState, MockStateMachine, MockingError
+from .parse_directives import parse_directive_text, DirectiveParsingError
+
 
 def make_document(source_path="notset") -> nodes.document:
     """Create a new docutils document."""
@@ -33,17 +32,23 @@ def make_document(source_path="notset") -> nodes.document:
 class DocRenderer:
     __output__ = "docutils"
 
-    def __init__(self, options=None, env=None):
+    def __init__(
+        self,
+        md: MarkdownIt,
+        options=None,
+        document: Optional[nodes.document] = None,
+        current_node: Optional[nodes.Element] = None,
+    ):
+        self.md = md
         self.options = options or {}
-        self.env = env or AttrDict()
         self.rules = {
             k: v
             for k, v in self.__class__.__dict__.items()
             if k.startswith("render_") and k != "render_children"
         }
-        self.document = make_document()
+        self.document = document or make_document()
         self.reporter = self.document.reporter  # type: Reporter
-        self.current_node = self.document
+        self.current_node = current_node or self.document
         self.language_module = self.document.settings.language_code  # type: str
         get_language(self.language_module)
         # TODO merge these with self.env?
@@ -58,8 +63,9 @@ def run_render(self, tokens: List[Token], env: AttrDict):
             containing additional metadata like reference info
         """
         self.env = env
-        last_map = None
+
         # propagate line number down to inline elements
+        last_map = None
         for token in tokens:
             if token.map:
                 last_map = token.map
@@ -67,7 +73,21 @@ def run_render(self, tokens: List[Token], env: AttrDict):
                 token.meta["parent_line"] = last_map[0]
             for child in token.children or []:
                 child.meta["parent_line"] = last_map[0]
+
+        # nest tokens
         tokens = nest_tokens(tokens)
+
+        # move footnote definitions to env
+        self.env["foot_refs"] = []
+        new_tokens = []
+        for token in tokens:
+            if token.type == "footnote_reference_open":
+                self.env["foot_refs"].append(token)
+            else:
+                new_tokens.append(token)
+        tokens = new_tokens
+
+        # render
         for i, token in enumerate(tokens):
             # skip hidden?
             if f"render_{token.type}" in self.rules:
@@ -75,6 +95,22 @@ def run_render(self, tokens: List[Token], env: AttrDict):
             else:
                 print(f"no render method for: {token.type}")
 
+        # TODO log warning for duplicate references
+
+        # add footnotes
+        referenced = {
+            v["label"] for v in self.env.get("footnotes", {}).get("list", {}).values()
+        }
+        # only output referenced
+        foot_refs = [f for f in self.env["foot_refs"] if f.meta["label"] in referenced]
+
+        if foot_refs:
+            self.current_node.append(nodes.transition())
+        for footref in foot_refs:  # TODO sort by referenced
+            self.render_footnote_reference_open(footref)
+
+        return self.document
+
     @contextmanager
     def current_node_context(self, node, append: bool = False):
         """Context manager for temporarily setting the current node."""
@@ -92,6 +128,17 @@ def render_children(self, token):
             else:
                 print(f"no render method for: {child.type}")
 
+    def nested_render_text(self, text: str, lineno: int):
+        """Render unparsed text."""
+        with self.md.reset_rules():
+            self.md.disable("front_matter", True)
+            tokens = self.md.parse(text, self.env)
+        for token in tokens:
+            if token.map:
+                token.map = [token.map[0] + lineno, token.map[1] + lineno]
+        # TODO propagate line numbers to children (make separate function)
+        self.run_render(tokens, self.env)
+
     def add_line_and_source_path(self, node, token):
         """Copy the line number and document source path to the docutils node."""
         try:
@@ -203,7 +250,11 @@ def render_code_inline(self, token):
 
     def render_fence(self, token):
         text = token.content
-        language = token.info.split()[0]
+        language = token.info.split()[0] if token.info else ""
+
+        if language.startswith("{") and language.endswith("}"):
+            return self.render_directive(token)
+
         if not language:
             try:
                 sphinx_env = self.document.settings.env
@@ -301,6 +352,25 @@ def render_front_matter(self, token):
         docinfo = dict_to_docinfo(data)
         self.current_node.append(docinfo)
 
+    # def render_table_open(self, token):
+    #     # print(token)
+    #     # raise
+
+    #     table = nodes.table()
+    #     table["classes"] += ["colwidths-auto"]
+    #     self.add_line_and_source_path(table, token)
+
+    #     thead = nodes.thead()
+    #     # TODO there can never be more than one header row (at least in mardown-it)
+    #     header = token.children[0].children[0]
+    #     for hrow in header.children:
+    #         nodes.t
+    #         style = hrow.attrGet("style")
+
+    #     tgroup = nodes.tgroup(cols)
+    #     table += tgroup
+    #     tgroup += thead
+
     def render_math_inline(self, token):
         content = token.content
         node = nodes.math(content, content)
@@ -378,29 +448,104 @@ def render_myst_role(self, token):
             problematic = inliner.problematic(text, rawsource, message)
             self.current_node += problematic
 
-        # # TODO representing as literal for place-holder
-        # node = nodes.literal(rawsource, rawsource)
-        # self.add_line_and_source_path(node, token)
-        # self.current_node.append(node)
-
-    # def render_table_open(self, token):
-    #     # print(token)
-    #     # raise
+    def render_directive(self, token: Token):
+        """Render special fenced code blocks as directives."""
+        first_line = token.info.split(maxsplit=1)
+        name = first_line[0][1:-1]
+        arguments = "" if len(first_line) == 1 else first_line[1]
+        # TODO directive name white/black lists
+        content = token.content
+        position = token.map[0]
+        self.document.current_line = position
+
+        # get directive class
+        directive_class, messages = directives.directive(
+            name, self.language_module, self.document
+        )  # type: (Directive, list)
+        if not directive_class:
+            error = self.reporter.error(
+                "Unknown directive type '{}'\n".format(name),
+                # nodes.literal_block(content, content),
+                line=position,
+            )
+            self.current_node += [error] + messages
+            return
 
-    #     table = nodes.table()
-    #     table["classes"] += ["colwidths-auto"]
-    #     self.add_line_and_source_path(table, token)
+        try:
+            arguments, options, body_lines = parse_directive_text(
+                directive_class, arguments, content
+            )
+        except DirectiveParsingError as error:
+            error = self.reporter.error(
+                "Directive '{}':\n{}".format(name, error),
+                nodes.literal_block(content, content),
+                line=position,
+            )
+            self.current_node += [error]
+            return
 
-    #     thead = nodes.thead()
-    #     # TODO there can never be more than one header row (at least in mardown-it)
-    #     header = token.children[0].children[0]
-    #     for hrow in header.children:
-    #         nodes.t
-    #         style = hrow.attrGet("style")
+        # initialise directive
+        # TODO Include
+        # if issubclass(directive_class, Include):
+        #     directive_instance = MockIncludeDirective(
+        #         self,
+        #         name=name,
+        #         klass=directive_class,
+        #         arguments=arguments,
+        #         options=options,
+        #         body=body_lines,
+        #         token=token,
+        #     )
+        else:
+            state_machine = MockStateMachine(self, position)
+            state = MockState(self, state_machine, position, token=token)
+            directive_instance = directive_class(
+                name=name,
+                # the list of positional arguments
+                arguments=arguments,
+                # a dictionary mapping option names to values
+                options=options,
+                # the directive content line by line
+                content=StringList(body_lines, self.document["source"]),
+                # the absolute line number of the first line of the directive
+                lineno=position,
+                # the line offset of the first line of the content
+                content_offset=0,  # TODO get content offset from `parse_directive_text`
+                # a string containing the entire directive
+                block_text="\n".join(body_lines),
+                state=state,
+                state_machine=state_machine,
+            )
 
-    #     tgroup = nodes.tgroup(cols)
-    #     table += tgroup
-    #     tgroup += thead
+        # run directive
+        try:
+            result = directive_instance.run()
+        except DirectiveError as error:
+            msg_node = self.reporter.system_message(
+                error.level, error.msg, line=position
+            )
+            msg_node += nodes.literal_block(content, content)
+            result = [msg_node]
+        except MockingError as exc:
+            error = self.reporter.error(
+                "Directive '{}' cannot be mocked:\n{}: {}".format(
+                    name, exc.__class__.__name__, exc
+                ),
+                nodes.literal_block(content, content),
+                line=position,
+            )
+            self.current_node += [error]
+            return
+        assert isinstance(
+            result, list
+        ), 'Directive "{}" must return a list of nodes.'.format(name)
+        for i in range(len(result)):
+            assert isinstance(
+                result[i], nodes.Node
+            ), 'Directive "{}" returned non-Node object (index {}): {}'.format(
+                name, i, result[i]
+            )
+        self.current_node += result
 
 
 def dict_to_docinfo(data):
@@ -418,48 +563,3 @@ def dict_to_docinfo(data):
         field_node += nodes.field_body(value, nodes.Text(value, value))
         docinfo += field_node
     return docinfo
-
-
-class MockingError(Exception):
-    """An exception to signal an error during mocking of docutils components."""
-
-
-class MockInliner:
-    """A mock version of `docutils.parsers.rst.states.Inliner`.
-
-    This is parsed to role functions.
-    """
-
-    def __init__(self, renderer: DocRenderer, lineno: int):
-        self._renderer = renderer
-        self.document = renderer.document
-        self.reporter = renderer.document.reporter
-        if not hasattr(self.reporter, "get_source_and_line"):
-            # TODO this is called by some roles,
-            # but I can't see how that would work in RST?
-            self.reporter.get_source_and_line = lambda l: (self.document["source"], l)
-        self.parent = renderer.current_node
-        self.language = renderer.language_module
-        self.rfc_url = "rfc%d.html"
-
-    def problematic(self, text: str, rawsource: str, message: nodes.system_message):
-        msgid = self.document.set_id(message, self.parent)
-        problematic = nodes.problematic(rawsource, rawsource, refid=msgid)
-        prbid = self.document.set_id(problematic)
-        message.add_backref(prbid)
-        return problematic
-
-    # TODO add parse method
-
-    def __getattr__(self, name):
-        """This method is only be called if the attribute requested has not
-        been defined. Defined attributes will not be overridden.
-        """
-        # TODO use document.reporter mechanism?
-        if hasattr(Inliner, name):
-            msg = "{cls} has not yet implemented attribute '{name}'".format(
-                cls=type(self).__name__, name=name
-            )
-            raise MockingError(msg).with_traceback(sys.exc_info()[2])
-        msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name)
-        raise MockingError(msg).with_traceback(sys.exc_info()[2])
diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py
index 8b991fd4..0fb490b0 100644
--- a/markdown_it/rules_core/block.py
+++ b/markdown_it/rules_core/block.py
@@ -9,6 +9,6 @@ def block(state: StateCore):
         token.content = state.src
         token.map = [0, 1]
         token.children = []
-        state.tokens.push(token)
+        state.tokens.append(token)
     else:
         state.md.block.parse(state.src, state.md, state.env, state.tokens)
diff --git a/try_running_renderer.py b/try_running_renderer.py
index 58a14d30..4bc38eea 100644
--- a/try_running_renderer.py
+++ b/try_running_renderer.py
@@ -5,7 +5,7 @@
 from markdown_it.extensions.myst_role import myst_role_plugin
 from markdown_it.extensions.texmath import texmath_plugin
 from markdown_it.extensions.footnote import footnote_plugin
-from markdown_it._doc_renderer import DocRenderer
+from markdown_it.myst.renderer import DocRenderer
 
 md = (
     MarkdownIt()
@@ -80,10 +80,16 @@
 [^foot]
 
 1. 345
+
+````{note}
+my title
+```{contents} abc
+```
+````
 """,
     env=env,
 )
-
-doc = DocRenderer()
+print(env)
+doc = DocRenderer(md)
 doc.run_render(tokens, env)
 print(doc.document.pformat())

From aae614db5e394072ee1359cb879f4a233062da1b Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Thu, 26 Mar 2020 12:56:53 +0000
Subject: [PATCH 3/4] add nesting

---
 markdown_it/myst/mocking.py  | 181 ++++++++++++++++++++++++++++++++---
 markdown_it/myst/renderer.py |  95 +++++++++++-------
 try_running_renderer.py      |   5 +-
 3 files changed, 235 insertions(+), 46 deletions(-)

diff --git a/markdown_it/myst/mocking.py b/markdown_it/myst/mocking.py
index a45c01c8..ef6d8d39 100644
--- a/markdown_it/myst/mocking.py
+++ b/markdown_it/myst/mocking.py
@@ -1,13 +1,14 @@
+from pathlib import Path
 import re
 import sys
 from typing import List, Optional
 
 from docutils import nodes
 from docutils.parsers.rst.states import Inliner, RSTStateMachine, Body
+from docutils.parsers.rst import DirectiveError
+from docutils.parsers.rst.directives.misc import Include
 from docutils.statemachine import StringList
 
-# from docutils.parsers.rst.directives.misc import Include
-
 
 class MockingError(Exception):
     """An exception to signal an error during mocking of docutils components."""
@@ -62,10 +63,9 @@ class MockState:
     rather than RST.
     """
 
-    def __init__(self, renderer, state_machine: "MockStateMachine", lineno: int, token):
+    def __init__(self, renderer, state_machine: "MockStateMachine", lineno: int):
         self._renderer = renderer
         self._lineno = lineno
-        self._token = token
         self.document = renderer.document
         self.state_machine = state_machine
 
@@ -106,7 +106,6 @@ def inline_text(self, text: str, lineno: int):
         for token in tokens:
             if token.map:
                 token.map = [token.map[0] + lineno, token.map[1] + lineno]
-        # TODO propagate line numbers to children (make separate function)
 
         # here we instantiate a new renderer,
         # so that the nested parse does not effect the current renderer,
@@ -117,7 +116,7 @@ def inline_text(self, text: str, lineno: int):
         nested_renderer = DocRenderer(
             self._renderer.md, document=self.document, current_node=paragraph
         )
-        nested_renderer.run_render(tokens, self._renderer.env)
+        nested_renderer.run_render(tokens, self._renderer.env, ouput_footnotes=False)
         return paragraph.children, messages
 
     # U+2014 is an em-dash:
@@ -211,12 +210,6 @@ def __init__(self, renderer, lineno: int):
         self.node = renderer.current_node
         self.match_titles = True
 
-        # TODO to allow to access like attributes like input_lines,
-        # we would need to store the input lines,
-        # probably via the `Document` token,
-        # and maybe self._lines = lines[:], then for AstRenderer,
-        # ignore private attributes
-
     def get_source(self, lineno: Optional[int] = None):
         """Return document source path."""
         return self.document["source"]
@@ -236,3 +229,167 @@ def __getattr__(self, name):
             raise MockingError(msg).with_traceback(sys.exc_info()[2])
         msg = "{cls} has no attribute {name}".format(cls=type(self).__name__, name=name)
         raise MockingError(msg).with_traceback(sys.exc_info()[2])
+
+
+class MockIncludeDirective:
+    """This directive uses a lot of statemachine logic that is not yet mocked.
+    Therefore, we treat it as a special case (at least for now).
+
+    See:
+    https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment
+    """
+
+    def __init__(
+        self,
+        renderer,
+        name: str,
+        klass: Include,
+        arguments: list,
+        options: dict,
+        body: List[str],
+        token,
+    ):
+        self.renderer = renderer
+        self.document = renderer.document
+        self.name = name
+        self.klass = klass
+        self.arguments = arguments
+        self.options = options
+        self.body = body
+        self.lineno = token.position.line_start
+        self.token = token
+
+    def run(self):
+
+        from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
+
+        if not self.document.settings.file_insertion_enabled:
+            raise DirectiveError(2, 'Directive "{}" disabled.'.format(self.name))
+
+        source_dir = Path(self.document["source"]).absolute().parent
+        include_arg = "".join([s.strip() for s in self.arguments[0].splitlines()])
+
+        if include_arg.startswith("<") and include_arg.endswith(">"):
+            # # docutils "standard" includes
+            path = Path(self.klass.standard_include_path).joinpath(include_arg[1:-1])
+        else:
+            # if using sphinx interpret absolute paths "correctly",
+            # i.e. relative to source directory
+            try:
+                sphinx_env = self.document.settings.env
+                _, include_arg = sphinx_env.relfn2path(self.arguments[0])
+                sphinx_env.note_included(include_arg)
+            except AttributeError:
+                pass
+            path = Path(include_arg)
+        path = source_dir.joinpath(path)
+
+        # read file
+        encoding = self.options.get("encoding", self.document.settings.input_encoding)
+        error_handler = self.document.settings.input_encoding_error_handler
+        # tab_width = self.options.get("tab-width", self.document.settings.tab_width)
+        try:
+            file_content = path.read_text(encoding=encoding, errors=error_handler)
+        except Exception as error:
+            raise DirectiveError(
+                4,
+                'Directive "{}": error reading file: {}\n{error}.'.format(
+                    self.name, path, error
+                ),
+            )
+
+        # get required section of text
+        startline = self.options.get("start-line", None)
+        endline = self.options.get("end-line", None)
+        file_content = "\n".join(file_content.splitlines()[startline:endline])
+        startline = startline or 0
+        for split_on_type in ["start-after", "end-before"]:
+            split_on = self.options.get(split_on_type, None)
+            if not split_on:
+                continue
+            split_index = file_content.find(split_on)
+            if split_index < 0:
+                raise DirectiveError(
+                    4,
+                    'Directive "{}"; option "{}": text not found "{}".'.format(
+                        self.name, split_on_type, split_on
+                    ),
+                )
+            if split_on_type == "start-after":
+                startline += split_index + len(split_on)
+                file_content = file_content[split_index + len(split_on) :]
+            else:
+                file_content = file_content[:split_index]
+
+        if "literal" in self.options:
+            literal_block = nodes.literal_block(
+                file_content, source=str(path), classes=self.options.get("class", [])
+            )
+            literal_block.line = 1  # TODO don;t think this should be 1?
+            self.add_name(literal_block)
+            if "number-lines" in self.options:
+                try:
+                    startline = int(self.options["number-lines"] or 1)
+                except ValueError:
+                    raise DirectiveError(
+                        3, ":number-lines: with non-integer " "start value"
+                    )
+                endline = startline + len(file_content.splitlines())
+                if file_content.endswith("\n"):
+                    file_content = file_content[:-1]
+                tokens = NumberLines([([], file_content)], startline, endline)
+                for classes, value in tokens:
+                    if classes:
+                        literal_block += nodes.inline(value, value, classes=classes)
+                    else:
+                        literal_block += nodes.Text(value)
+            else:
+                literal_block += nodes.Text(file_content)
+            return [literal_block]
+        if "code" in self.options:
+            self.options["source"] = str(path)
+            state_machine = MockStateMachine(self.renderer, self.lineno)
+            state = MockState(self.renderer, state_machine, self.lineno)
+            codeblock = CodeBlock(
+                name=self.name,
+                arguments=[self.options.pop("code")],
+                options=self.options,
+                content=file_content.splitlines(),
+                lineno=self.lineno,
+                content_offset=0,
+                block_text=file_content,
+                state=state,
+                state_machine=state_machine,
+            )
+            return codeblock.run()
+
+        # Here we perform a nested render, but temporarily setup the document/reporter
+        # with the correct document path and lineno for the included file.
+        source = self.renderer.document["source"]
+        rsource = self.renderer.reporter.source
+        line_func = getattr(self.renderer.reporter, "get_source_and_line", None)
+        try:
+            self.renderer.document["source"] = str(path)
+            self.renderer.reporter.source = str(path)
+            self.renderer.reporter.get_source_and_line = lambda l: (str(path), l)
+            self.renderer.nested_render_text(file_content, startline)
+        finally:
+            self.renderer.document["source"] = source
+            self.renderer.reporter.source = rsource
+            if line_func is not None:
+                self.renderer.reporter.get_source_and_line = line_func
+            else:
+                del self.renderer.reporter.get_source_and_line
+        return []
+
+    def add_name(self, node):
+        """Append self.options['name'] to node['names'] if it exists.
+
+        Also normalize the name string and register it as explicit target.
+        """
+        if "name" in self.options:
+            name = nodes.fully_normalize_name(self.options.pop("name"))
+            if "name" in node:
+                del node["name"]
+            node["names"].append(name)
+            self.renderer.document.note_explicit_target(node, node)
diff --git a/markdown_it/myst/renderer.py b/markdown_it/myst/renderer.py
index 7399e35e..0da8548d 100644
--- a/markdown_it/myst/renderer.py
+++ b/markdown_it/myst/renderer.py
@@ -11,6 +11,7 @@
 from docutils.languages import get_language
 from docutils.parsers.rst import directives, Directive, DirectiveError, roles
 from docutils.parsers.rst import Parser as RSTParser
+from docutils.parsers.rst.directives.misc import Include
 from docutils.statemachine import StringList
 from docutils.utils import new_document, Reporter
 
@@ -19,7 +20,13 @@
 from markdown_it.utils import AttrDict
 from markdown_it.common.utils import escapeHtml
 
-from .mocking import MockInliner, MockState, MockStateMachine, MockingError
+from .mocking import (
+    MockInliner,
+    MockState,
+    MockStateMachine,
+    MockingError,
+    MockIncludeDirective,
+)
 from .parse_directives import parse_directive_text, DirectiveParsingError
 
 
@@ -55,7 +62,7 @@ def __init__(
         self.config = {}
         self._level_to_elem = {0: self.document}
 
-    def run_render(self, tokens: List[Token], env: AttrDict):
+    def run_render(self, tokens: List[Token], env: AttrDict, ouput_footnotes=True):
         """Run the render on a token stream.
 
         :param tokens: the token stream
@@ -65,20 +72,15 @@ def run_render(self, tokens: List[Token], env: AttrDict):
         self.env = env
 
         # propagate line number down to inline elements
-        last_map = None
         for token in tokens:
-            if token.map:
-                last_map = token.map
-            elif last_map:
-                token.meta["parent_line"] = last_map[0]
             for child in token.children or []:
-                child.meta["parent_line"] = last_map[0]
+                child.map = token.map
 
         # nest tokens
         tokens = nest_tokens(tokens)
 
         # move footnote definitions to env
-        self.env["foot_refs"] = []
+        self.env.setdefault("foot_refs", [])
         new_tokens = []
         for token in tokens:
             if token.type == "footnote_reference_open":
@@ -97,6 +99,9 @@ def run_render(self, tokens: List[Token], env: AttrDict):
 
         # TODO log warning for duplicate references
 
+        if not ouput_footnotes:
+            return self.document
+
         # add footnotes
         referenced = {
             v["label"] for v in self.env.get("footnotes", {}).get("list", {}).values()
@@ -111,6 +116,42 @@ def run_render(self, tokens: List[Token], env: AttrDict):
 
         return self.document
 
+    def nested_render_text(self, text: str, lineno: int):
+        """Render unparsed text."""
+
+        # parse without front matter
+        with self.md.reset_rules():
+            self.md.disable("front_matter", True)
+            tokens = self.md.parse(text, self.env)
+
+        # set correct line numbers
+        for token in tokens:
+            if token.map:
+                token.map = [token.map[0] + lineno, token.map[1] + lineno]
+                for child in token.children or []:
+                    child.map = token.map
+
+        # nest tokens
+        tokens = nest_tokens(tokens)
+
+        # move footnote definitions to env
+        self.env.setdefault("foot_refs", [])
+        new_tokens = []
+        for token in tokens:
+            if token.type == "footnote_reference_open":
+                self.env["foot_refs"].append(token)
+            else:
+                new_tokens.append(token)
+        tokens = new_tokens
+
+        # render
+        for i, token in enumerate(tokens):
+            # skip hidden?
+            if f"render_{token.type}" in self.rules:
+                self.rules[f"render_{token.type}"](self, token)
+            else:
+                print(f"no render method for: {token.type}")
+
     @contextmanager
     def current_node_context(self, node, append: bool = False):
         """Context manager for temporarily setting the current node."""
@@ -128,17 +169,6 @@ def render_children(self, token):
             else:
                 print(f"no render method for: {child.type}")
 
-    def nested_render_text(self, text: str, lineno: int):
-        """Render unparsed text."""
-        with self.md.reset_rules():
-            self.md.disable("front_matter", True)
-            tokens = self.md.parse(text, self.env)
-        for token in tokens:
-            if token.map:
-                token.map = [token.map[0] + lineno, token.map[1] + lineno]
-        # TODO propagate line numbers to children (make separate function)
-        self.run_render(tokens, self.env)
-
     def add_line_and_source_path(self, node, token):
         """Copy the line number and document source path to the docutils node."""
         try:
@@ -432,7 +462,7 @@ def render_myst_role(self, token):
         name = token.meta["name"]
         text = escapeHtml(token.content)  # TODO check this
         rawsource = f":{name}:`{token.content}`"
-        lineno = token.meta.get("parent_line", 0)
+        lineno = token.map[0] if token.map else 0
         role_func, messages = roles.role(
             name, self.language_module, lineno, self.reporter
         )
@@ -485,20 +515,19 @@ def render_directive(self, token: Token):
             return
 
         # initialise directive
-        # TODO Include
-        # if issubclass(directive_class, Include):
-        #     directive_instance = MockIncludeDirective(
-        #         self,
-        #         name=name,
-        #         klass=directive_class,
-        #         arguments=arguments,
-        #         options=options,
-        #         body=body_lines,
-        #         token=token,
-        #     )
+        if issubclass(directive_class, Include):
+            directive_instance = MockIncludeDirective(
+                self,
+                name=name,
+                klass=directive_class,
+                arguments=arguments,
+                options=options,
+                body=body_lines,
+                token=token,
+            )
         else:
             state_machine = MockStateMachine(self, position)
-            state = MockState(self, state_machine, position, token=token)
+            state = MockState(self, state_machine, position)
             directive_instance = directive_class(
                 name=name,
                 # the list of positional arguments
diff --git a/try_running_renderer.py b/try_running_renderer.py
index 4bc38eea..6c0a99e1 100644
--- a/try_running_renderer.py
+++ b/try_running_renderer.py
@@ -85,11 +85,14 @@
 my title
 ```{contents} abc
 ```
+[^dfg]
+[^dfg]: xyc
 ````
 """,
     env=env,
 )
-print(env)
+
 doc = DocRenderer(md)
 doc.run_render(tokens, env)
+print(len(env["foot_refs"]))
 print(doc.document.pformat())

From 26a60dd67890ecf59a512233ca6ff1f9bdaefb9b Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Thu, 26 Mar 2020 13:44:47 +0000
Subject: [PATCH 4/4] add sphinx renderer

---
 .../myst/{renderer.py => doc_renderer.py}     |  68 +++++++-
 markdown_it/myst/mocking.py                   |   4 +-
 markdown_it/myst/sphinx_renderer.py           | 161 ++++++++++++++++++
 try_running_renderer.py                       |   8 +-
 4 files changed, 227 insertions(+), 14 deletions(-)
 rename markdown_it/myst/{renderer.py => doc_renderer.py} (89%)
 create mode 100644 markdown_it/myst/sphinx_renderer.py

diff --git a/markdown_it/myst/renderer.py b/markdown_it/myst/doc_renderer.py
similarity index 89%
rename from markdown_it/myst/renderer.py
rename to markdown_it/myst/doc_renderer.py
index 0da8548d..0201782d 100644
--- a/markdown_it/myst/renderer.py
+++ b/markdown_it/myst/doc_renderer.py
@@ -1,7 +1,10 @@
 """NOTE: this will eventually be moved out of core"""
 from contextlib import contextmanager
+import inspect
 import json
+from os.path import splitext
 from typing import List, Optional
+from urllib.parse import urlparse
 
 import yaml
 
@@ -36,7 +39,7 @@ def make_document(source_path="notset") -> nodes.document:
     return new_document(source_path, settings=settings)
 
 
-class DocRenderer:
+class DocutilsRenderer:
     __output__ = "docutils"
 
     def __init__(
@@ -50,7 +53,7 @@ def __init__(
         self.options = options or {}
         self.rules = {
             k: v
-            for k, v in self.__class__.__dict__.items()
+            for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
             if k.startswith("render_") and k != "render_children"
         }
         self.document = document or make_document()
@@ -93,7 +96,7 @@ def run_render(self, tokens: List[Token], env: AttrDict, ouput_footnotes=True):
         for i, token in enumerate(tokens):
             # skip hidden?
             if f"render_{token.type}" in self.rules:
-                self.rules[f"render_{token.type}"](self, token)
+                self.rules[f"render_{token.type}"](token)
             else:
                 print(f"no render method for: {token.type}")
 
@@ -148,7 +151,7 @@ def nested_render_text(self, text: str, lineno: int):
         for i, token in enumerate(tokens):
             # skip hidden?
             if f"render_{token.type}" in self.rules:
-                self.rules[f"render_{token.type}"](self, token)
+                self.rules[f"render_{token.type}"](token)
             else:
                 print(f"no render method for: {token.type}")
 
@@ -165,7 +168,7 @@ def current_node_context(self, node, append: bool = False):
     def render_children(self, token):
         for i, child in enumerate(token.children or []):
             if f"render_{child.type}" in self.rules:
-                self.rules[f"render_{child.type}"](self, child)
+                self.rules[f"render_{child.type}"](child)
             else:
                 print(f"no render method for: {child.type}")
 
@@ -330,10 +333,57 @@ def render_heading_open(self, token):
         self.current_node = section
 
     def render_link_open(self, token):
-        # TODO I think this is maybe already handled at this point?
-        # refuri = escape_url(http://github.com/token.target)
-        # TODO identify cross-references
-        refuri = target = token.attrGet("href")
+        if token.markup == "autolink":
+            return self.render_autolink(token)
+
+        ref_node = nodes.reference()
+        self.add_line_and_source_path(ref_node, token)
+        # Check destination is supported for cross-linking and remove extension
+        # TODO escape urls?
+        destination = token.attrGet("href")
+        title = token.attrGet("title")
+        _, ext = splitext(destination)
+        # TODO check for other supported extensions, such as those specified in
+        # the Sphinx conf.py file but how to access this information?
+        # TODO this should probably only remove the extension for local paths,
+        # i.e. not uri's starting with http or other external prefix.
+
+        # if ext.replace('.', '') in self.supported:
+        #     destination = destination.replace(ext, '')
+        ref_node["refuri"] = destination
+        print(token)
+        if title:
+            ref_node["title"] = title
+        next_node = ref_node
+
+        url_check = urlparse(destination)
+        # If there's not a url scheme (e.g. 'https' for 'https:...' links),
+        # or there is a scheme but it's not in the list of known_url_schemes,
+        # then assume it's a cross-reference
+        known_url_schemes = self.config.get("known_url_schemes", None)
+        if known_url_schemes:
+            scheme_known = url_check.scheme in known_url_schemes
+        else:
+            scheme_known = bool(url_check.scheme)
+
+        if not url_check.fragment and not scheme_known:
+            self.handle_cross_reference(token, destination)
+        else:
+            self.current_node.append(next_node)
+            with self.current_node_context(ref_node):
+                self.render_children(token)
+
+    def handle_cross_reference(self, token, destination):
+        # TODO use the docutils error reporting mechanisms, rather than raising
+        if not self.config.get("ignore_missing_refs", False):
+            raise NotImplementedError(
+                "reference not found in current document: {} (lines: {})".format(
+                    destination, token.map
+                )
+            )
+
+    def render_autolink(self, token):
+        refuri = target = escapeHtml(token.attrGet("href"))
         ref_node = nodes.reference(target, target, refuri=refuri)
         self.add_line_and_source_path(ref_node, token)
         self.current_node.append(ref_node)
diff --git a/markdown_it/myst/mocking.py b/markdown_it/myst/mocking.py
index ef6d8d39..af045ed7 100644
--- a/markdown_it/myst/mocking.py
+++ b/markdown_it/myst/mocking.py
@@ -111,9 +111,9 @@ def inline_text(self, text: str, lineno: int):
         # so that the nested parse does not effect the current renderer,
         # but we use the same env, so that link references, etc
         # are added to the global parse.
-        from .renderer import DocRenderer
+        from .doc_renderer import DocutilsRenderer
 
-        nested_renderer = DocRenderer(
+        nested_renderer = DocutilsRenderer(
             self._renderer.md, document=self.document, current_node=paragraph
         )
         nested_renderer.run_render(tokens, self._renderer.env, ouput_footnotes=False)
diff --git a/markdown_it/myst/sphinx_renderer.py b/markdown_it/myst/sphinx_renderer.py
new file mode 100644
index 00000000..a16da70a
--- /dev/null
+++ b/markdown_it/myst/sphinx_renderer.py
@@ -0,0 +1,161 @@
+import copy
+from urllib.parse import unquote
+
+from docutils import nodes
+from docutils.parsers.rst import directives, roles
+
+from .doc_renderer import DocutilsRenderer
+
+
+class SphinxRenderer(DocutilsRenderer):
+    """A mistletoe renderer to populate (in-place) a `docutils.document` AST.
+
+    This is sub-class of `DocutilsRenderer` that handles sphinx cross-referencing.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """Initialise SphinxRenderer
+
+        :param load_sphinx_env: load a basic sphinx environment,
+            when using the renderer as a context manager outside if `sphinx-build`
+        :param sphinx_conf: a dictionary representation of the sphinx `conf.py`
+        :param sphinx_srcdir: a path to a source directory
+          (for example, can be used for `include` statements)
+
+        To use this renderer in a 'standalone' fashion::
+
+            from myst_parser.block_tokens import Document
+
+            with SphinxRenderer(load_sphinx_env=True, sphinx_conf={}) as renderer:
+                renderer.render(Document.read("source text"))
+
+        """
+        self.load_sphinx_env = kwargs.pop("load_sphinx_env", False)
+        self.sphinx_conf = kwargs.pop("sphinx_conf", None)
+        self.sphinx_srcdir = kwargs.pop("sphinx_srcdir", None)
+        super().__init__(*args, **kwargs)
+
+    def handle_cross_reference(self, token, destination):
+        from sphinx import addnodes
+
+        wrap_node = addnodes.pending_xref(
+            reftarget=unquote(destination),
+            reftype="any",
+            refdomain=None,  # Added to enable cross-linking
+            refexplicit=len(token.children) > 0,
+            refwarn=True,
+        )
+        self.add_line_and_source_path(wrap_node, token)
+        title = token.attrGet("title")
+        if title:
+            wrap_node["title"] = title
+        self.current_node.append(wrap_node)
+        text_node = nodes.literal("", "", classes=["xref", "any"])
+        wrap_node.append(text_node)
+        with self.current_node_context(text_node):
+            self.render_children(token)
+
+    def mock_sphinx_env(self, configuration=None, sourcedir=None):
+        """Create a minimimal Sphinx environment;
+        loading sphinx roles, directives, etc.
+        """
+        from sphinx.application import builtin_extensions, Sphinx
+        from sphinx.config import Config
+        from sphinx.environment import BuildEnvironment
+        from sphinx.events import EventManager
+        from sphinx.project import Project
+        from sphinx.registry import SphinxComponentRegistry
+        from sphinx.util.tags import Tags
+
+        class MockSphinx(Sphinx):
+            """Minimal sphinx init to load roles and directives."""
+
+            def __init__(self, confoverrides=None, srcdir=None):
+                self.extensions = {}
+                self.registry = SphinxComponentRegistry()
+                self.html_themes = {}
+                self.events = EventManager(self)
+                self.tags = Tags(None)
+                self.config = Config({}, confoverrides or {})
+                self.config.pre_init_values()
+                self._init_i18n()
+                for extension in builtin_extensions:
+                    self.registry.load_extension(self, extension)
+                # fresh env
+                self.doctreedir = None
+                self.srcdir = srcdir
+                self.confdir = None
+                self.outdir = None
+                self.project = Project(srcdir=srcdir, source_suffix=".md")
+                self.project.docnames = ["mock_docname"]
+                self.env = BuildEnvironment()
+                self.env.setup(self)
+                self.env.temp_data["docname"] = "mock_docname"
+                self.builder = None
+
+                if not confoverrides:
+                    return
+
+                # this code is only required for more complex parsing with extensions
+                for extension in self.config.extensions:
+                    self.setup_extension(extension)
+                buildername = "dummy"
+                self.preload_builder(buildername)
+                self.config.init_values()
+                self.events.emit("config-inited", self.config)
+                import tempfile
+
+                with tempfile.TemporaryDirectory() as tempdir:
+                    # creating a builder attempts to make the doctreedir
+                    self.doctreedir = tempdir
+                    self.builder = self.create_builder(buildername)
+                self.doctreedir = None
+
+        app = MockSphinx(confoverrides=configuration, srcdir=sourcedir)
+        self.document.settings.env = app.env
+        return app
+
+    def __enter__(self):
+        """If `load_sphinx_env=True`, we set up an environment,
+        to parse sphinx roles/directives, outside of a `sphinx-build`.
+
+        This primarily copies the code in `sphinx.util.docutils.docutils_namespace`
+        and `sphinx.util.docutils.sphinx_domains`.
+        """
+        if not self.load_sphinx_env:
+            return super().__enter__()
+
+        # store currently loaded roles/directives, so we can revert on exit
+        self._directives = copy.copy(directives._directives)
+        self._roles = copy.copy(roles._roles)
+        # Monkey-patch directive and role dispatch,
+        # so that sphinx domain-specific markup takes precedence.
+        self._env = self.mock_sphinx_env(
+            configuration=self.sphinx_conf, sourcedir=self.sphinx_srcdir
+        ).env
+        from sphinx.util.docutils import sphinx_domains
+
+        self._sphinx_domains = sphinx_domains(self._env)
+        self._sphinx_domains.enable()
+
+        return super().__enter__()
+
+    def __exit__(self, exception_type, exception_val, traceback):
+        if not self.load_sphinx_env:
+            return super().__exit__(exception_type, exception_val, traceback)
+        # revert loaded roles/directives
+        directives._directives = self._directives
+        roles._roles = self._roles
+        self._directives = None
+        self._roles = None
+        # unregister nodes (see `sphinx.util.docutils.docutils_namespace`)
+        from sphinx.util.docutils import additional_nodes, unregister_node
+
+        for node in list(additional_nodes):
+            unregister_node(node)
+            additional_nodes.discard(node)
+        # revert directive/role function (see `sphinx.util.docutils.sphinx_domains`)
+        self._sphinx_domains.disable()
+        self._sphinx_domains = None
+        self._env = None
+        return super().__exit__(exception_type, exception_val, traceback)
diff --git a/try_running_renderer.py b/try_running_renderer.py
index 6c0a99e1..8a92ed22 100644
--- a/try_running_renderer.py
+++ b/try_running_renderer.py
@@ -5,7 +5,7 @@
 from markdown_it.extensions.myst_role import myst_role_plugin
 from markdown_it.extensions.texmath import texmath_plugin
 from markdown_it.extensions.footnote import footnote_plugin
-from markdown_it.myst.renderer import DocRenderer
+from markdown_it.myst.sphinx_renderer import SphinxRenderer
 
 md = (
     MarkdownIt()
@@ -52,7 +52,7 @@
 
 [a][b]
 
-[b]: s
+[b]: s "a"
 
 <div>A</div>
 
@@ -88,11 +88,13 @@
 [^dfg]
 [^dfg]: xyc
 ````
+[dvs]: a
+[dvs]
 """,
     env=env,
 )
 
-doc = DocRenderer(md)
+doc = SphinxRenderer(md)
 doc.run_render(tokens, env)
 print(len(env["foot_refs"]))
 print(doc.document.pformat())