Commit 629cd944 authored by pennae's avatar pennae
Browse files

docs/nrd: remove docbook rendering support

it's no longer needed and if anything impedes further development of the
tooling by its sheer undecipherability of reasoning alone. users of the
docbook renderers can still pull nrd from 23.11 to get this support for
the foreseeable future, but with everything we can remember having moved
away from docbook-like toolchains already that seems unlikely to happen.
parent ec71d0da
Loading
Loading
Loading
Loading
+0 −234
Original line number Diff line number Diff line
from collections.abc import Mapping, Sequence
from typing import cast, Optional, NamedTuple

from markdown_it.token import Token
from xml.sax.saxutils import escape, quoteattr

from .md import Renderer

class Deflist:
    has_dd = False

class Heading(NamedTuple):
    container_tag: str
    level: int
    # special handling for <part> titles: whether partinfo was already closed from elsewhere
    # or still needs closing.
    partintro_closed: bool = False

class DocBookRenderer(Renderer):
    _link_tags: list[str]
    _deflists: list[Deflist]
    _headings: list[Heading]
    _attrspans: list[str]

    def __init__(self, manpage_urls: Mapping[str, str]):
        super().__init__(manpage_urls)
        self._link_tags = []
        self._deflists = []
        self._headings = []
        self._attrspans = []

    def render(self, tokens: Sequence[Token]) -> str:
        result = super().render(tokens)
        result += self._close_headings(None)
        return result
    def renderInline(self, tokens: Sequence[Token]) -> str:
        # HACK to support docbook links and xrefs. link handling is only necessary because the docbook
        # manpage stylesheet converts - in urls to a mathematical minus, which may be somewhat incorrect.
        for i, token in enumerate(tokens):
            if token.type != 'link_open':
                continue
            token.tag = 'link'
            # turn [](#foo) into xrefs
            if token.attrs['href'][0:1] == '#' and tokens[i + 1].type == 'link_close': # type: ignore[index]
                token.tag = "xref"
            # turn <x> into links without contents
            if tokens[i + 1].type == 'text' and tokens[i + 1].content == token.attrs['href']:
                tokens[i + 1].content = ''

        return super().renderInline(tokens)

    def text(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return escape(token.content)
    def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para>"
    def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</para>"
    def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<literallayout>\n</literallayout>"
    def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        # should check options.breaks() and emit hard break if so
        return "\n"
    def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return f"<literal>{escape(token.content)}</literal>"
    def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return f"<programlisting>{escape(token.content)}</programlisting>"
    def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        self._link_tags.append(token.tag)
        href = cast(str, token.attrs['href'])
        (attr, start) = ('linkend', 1) if href[0] == '#' else ('xlink:href', 0)
        return f"<{token.tag} {attr}={quoteattr(href[start:])}>"
    def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return f"</{self._link_tags.pop()}>"
    def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<listitem>"
    def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</listitem>\n"
    # HACK open and close para for docbook change size. remove soon.
    def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        spacing = ' spacing="compact"' if token.meta.get('compact', False) else ''
        return f"<para><itemizedlist{spacing}>\n"
    def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "\n</itemizedlist></para>"
    def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<emphasis>"
    def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</emphasis>"
    def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<emphasis role=\"strong\">"
    def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</emphasis>"
    def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        info = f" language={quoteattr(token.info)}" if token.info != "" else ""
        return f"<programlisting{info}>{escape(token.content)}</programlisting>"
    def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><blockquote>"
    def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</blockquote></para>"
    def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><note>"
    def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</note></para>"
    def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><caution>"
    def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</caution></para>"
    def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><important>"
    def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</important></para>"
    def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><tip>"
    def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</tip></para>"
    def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><warning>"
    def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</warning></para>"
    # markdown-it emits tokens based on the html syntax tree, but docbook is
    # slightly different. html has <dl>{<dt/>{<dd/>}}</dl>,
    # docbook has <variablelist>{<varlistentry><term/><listitem/></varlistentry>}<variablelist>
    # we have to reject multiple definitions for the same term for time being.
    def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        self._deflists.append(Deflist())
        return "<para><variablelist>"
    def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        self._deflists.pop()
        return "</variablelist></para>"
    def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        self._deflists[-1].has_dd = False
        return "<varlistentry><term>"
    def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</term>"
    def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        if self._deflists[-1].has_dd:
            raise Exception("multiple definitions per term not supported")
        self._deflists[-1].has_dd = True
        return "<listitem>"
    def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</listitem></varlistentry>"
    def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        if token.meta['name'] == 'command':
            return f"<command>{escape(token.content)}</command>"
        if token.meta['name'] == 'file':
            return f"<filename>{escape(token.content)}</filename>"
        if token.meta['name'] == 'var':
            return f"<varname>{escape(token.content)}</varname>"
        if token.meta['name'] == 'env':
            return f"<envar>{escape(token.content)}</envar>"
        if token.meta['name'] == 'option':
            return f"<option>{escape(token.content)}</option>"
        if token.meta['name'] == 'manpage':
            [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ]
            section = section[:-1]
            man = f"{page}({section})"
            title = f"<refentrytitle>{escape(page)}</refentrytitle>"
            vol = f"<manvolnum>{escape(section)}</manvolnum>"
            ref = f"<citerefentry>{title}{vol}</citerefentry>"
            if man in self._manpage_urls:
                return f"<link xlink:href={quoteattr(self._manpage_urls[man])}>{ref}</link>"
            else:
                return ref
        raise NotImplementedError("md node not supported yet", token)
    def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        # we currently support *only* inline anchors and the special .keycap class to produce
        # <keycap> docbook elements.
        (id_part, class_part) = ("", "")
        if s := token.attrs.get('id'):
            id_part = f'<anchor xml:id={quoteattr(cast(str, s))} />'
        if s := token.attrs.get('class'):
            if s == 'keycap':
                class_part = "<keycap>"
                self._attrspans.append("</keycap>")
            else:
                return super().attr_span_begin(token, tokens, i)
        else:
            self._attrspans.append("")
        return id_part + class_part
    def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return self._attrspans.pop()
    def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        start = f' startingnumber="{token.attrs["start"]}"' if 'start' in token.attrs else ""
        spacing = ' spacing="compact"' if token.meta.get('compact', False) else ''
        return f"<orderedlist{start}{spacing}>"
    def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</orderedlist>"
    def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        hlevel = int(token.tag[1:])
        result = self._close_headings(hlevel)
        (tag, attrs) = self._heading_tag(token, tokens, i)
        self._headings.append(Heading(tag, hlevel))
        attrs_str = "".join([ f" {k}={quoteattr(v)}" for k, v in attrs.items() ])
        return result + f'<{tag}{attrs_str}>\n<title>'
    def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        heading = self._headings[-1]
        result = '</title>'
        if heading.container_tag == 'part':
            # generate the same ids as were previously assigned manually. if this collides we
            # rely on outside schema validation to catch it!
            maybe_id = ""
            assert tokens[i - 2].type == 'heading_open'
            if id := cast(str, tokens[i - 2].attrs.get('id', "")):
                maybe_id = " xml:id=" + quoteattr(id + "-intro")
            result += f"<partintro{maybe_id}>"
        return result
    def example_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        if id := cast(str, token.attrs.get('id', '')):
            id = f'xml:id={quoteattr(id)}' if id else ''
        return f'<example {id}>'
    def example_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</example>"
    def example_title_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<title>"
    def example_title_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</title>"

    def _close_headings(self, level: Optional[int]) -> str:
        # we rely on markdown-it producing h{1..6} tags in token.tag for this to work
        result = []
        while len(self._headings):
            if level is None or self._headings[-1].level >= level:
                heading = self._headings.pop()
                if heading.container_tag == 'part' and not heading.partintro_closed:
                    result.append("</partintro>")
                result.append(f"</{heading.container_tag}>")
            else:
                break
        return "\n".join(result)

    def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]:
        attrs = {}
        if id := token.attrs.get('id'):
            attrs['xml:id'] = cast(str, id)
        return ("section", attrs)
+8 −91
Original line number Diff line number Diff line
@@ -13,7 +13,6 @@ from typing import Any, Callable, cast, ClassVar, Generic, get_args, NamedTuple
from markdown_it.token import Token

from . import md, options
from .docbook import DocBookRenderer, Heading
from .html import HTMLRenderer, UnresolvedXrefError
from .manual_structure import check_structure, FragmentType, is_include, make_xml_id, TocEntry, TocEntryType, XrefTarget
from .md import Converter, Renderer
@@ -200,74 +199,6 @@ class RendererMixin(Renderer):
    def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        raise NotImplementedError()

class ManualDocBookRenderer(RendererMixin, DocBookRenderer):
    def __init__(self, toplevel_tag: str, revision: str, manpage_urls: Mapping[str, str]):
        super().__init__(toplevel_tag, revision, manpage_urls)

    def _render_book(self, tokens: Sequence[Token]) -> str:
        assert tokens[1].children
        assert tokens[4].children
        if (maybe_id := cast(str, tokens[0].attrs.get('id', ""))):
            maybe_id = "xml:id=" + xml.quoteattr(maybe_id)
        return (f'<book xmlns="http://docbook.org/ns/docbook"'
                f'      xmlns:xlink="http://www.w3.org/1999/xlink"'
                f'      {maybe_id} version="5.0">'
                f'  <title>{self.renderInline(tokens[1].children)}</title>'
                f'  <subtitle>{self.renderInline(tokens[4].children)}</subtitle>'
                f'  {super(DocBookRenderer, self).render(tokens[6:])}'
                f'</book>')

    def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]:
        (tag, attrs) = super()._heading_tag(token, tokens, i)
        # render() has already verified that we don't have supernumerary headings and since the
        # book tag is handled specially we can leave the check this simple
        if token.tag != 'h1':
            return (tag, attrs)
        return (self._toplevel_tag, attrs | {
            'xmlns': "http://docbook.org/ns/docbook",
            'xmlns:xlink': "http://www.w3.org/1999/xlink",
        })

    def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
        result = []
        # close existing partintro. the generic render doesn't really need this because
        # it doesn't have a concept of structure in the way the manual does.
        if self._headings and self._headings[-1] == Heading('part', 1):
            result.append("</partintro>")
            self._headings[-1] = self._headings[-1]._replace(partintro_closed=True)
        # must nest properly for structural includes. this requires saving at least
        # the headings stack, but creating new renderers is cheap and much easier.
        r = ManualDocBookRenderer(tag, self._revision, self._manpage_urls)
        for (included, path) in token.meta['included']:
            try:
                result.append(r.render(included))
            except Exception as e:
                raise RuntimeError(f"rendering {path}") from e
        return "".join(result)
    def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        conv = options.DocBookConverter(self._manpage_urls, self._revision, 'fragment',
                                        token.meta['list-id'], token.meta['id-prefix'])
        conv.add_options(token.meta['source'])
        return conv.finalize(fragment=True)

    # TODO minimize docbook diffs with existing conversions. remove soon.
    def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return super().paragraph_open(token, tokens, i) + "\n "
    def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "\n" + super().paragraph_close(token, tokens, i)
    def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return f"<programlisting>\n{xml.escape(token.content)}</programlisting>"
    def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        info = f" language={xml.quoteattr(token.info)}" if token.info != "" else ""
        return f"<programlisting{info}>\n{xml.escape(token.content)}</programlisting>"

class DocBookConverter(BaseConverter[ManualDocBookRenderer]):
    INCLUDE_ARGS_NS = "docbook"

    def __init__(self, manpage_urls: Mapping[str, str], revision: str):
        super().__init__()
        self._renderer = ManualDocBookRenderer('book', revision, manpage_urls)


class HTMLParameters(NamedTuple):
    generator: str
@@ -457,7 +388,7 @@ class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
                    f' </span>'
                    f'</dt>'
                )
                # we want to look straight through parts because docbook-xsl does too, but it
                # we want to look straight through parts because docbook-xsl did too, but it
                # also makes for more uesful top-level tocs.
                next_level = walk_and_emit(child, depth - (0 if child.kind == 'part' else 1))
                if next_level:
@@ -477,7 +408,7 @@ class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
                '</div>'
            )
        # we don't want to generate the "Title of Contents" header for sections,
        # docbook doesn't and it's only distracting clutter unless it's the main table.
        # docbook didn't and it's only distracting clutter unless it's the main table.
        # we also want to generate tocs only for a top-level section (ie, one that is
        # not itself contained in another section)
        print_title = toc.kind != 'section'
@@ -506,12 +437,12 @@ class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
        ])

    def _make_hN(self, level: int) -> tuple[str, str]:
        # for some reason chapters don't increase the hN nesting count in docbook xslts. duplicate
        # this for consistency.
        # for some reason chapters didn't increase the hN nesting count in docbook xslts.
        # originally this was duplicated here for consistency with docbook rendering, but
        # it could be reevaluated and changed now that docbook is gone.
        if self._toplevel_tag == 'chapter':
            level -= 1
        # TODO docbook compat. these are never useful for us, but not having them breaks manual
        # compare workflows while docbook is still allowed.
        # this style setting is also for docbook compatibility only and could well go away.
        style = ""
        if level + self._hlevel_offset < 3 \
           and (self._toplevel_tag == 'section' or (self._toplevel_tag == 'chapter' and level > 0)):
@@ -537,7 +468,7 @@ class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
        if into:
            toc = TocEntry.of(fragments[0][0][0])
            inner.append(self._file_header(toc))
            # we do not set _hlevel_offset=0 because docbook doesn't either.
            # we do not set _hlevel_offset=0 because docbook didn't either.
        else:
            inner = outer
        in_dir = self._in_dir
@@ -742,12 +673,6 @@ class HTMLConverter(BaseConverter[ManualHTMLRenderer]):



def _build_cli_db(p: argparse.ArgumentParser) -> None:
    p.add_argument('--manpage-urls', required=True)
    p.add_argument('--revision', required=True)
    p.add_argument('infile', type=Path)
    p.add_argument('outfile', type=Path)

def _build_cli_html(p: argparse.ArgumentParser) -> None:
    p.add_argument('--manpage-urls', required=True)
    p.add_argument('--revision', required=True)
@@ -761,11 +686,6 @@ def _build_cli_html(p: argparse.ArgumentParser) -> None:
    p.add_argument('infile', type=Path)
    p.add_argument('outfile', type=Path)

def _run_cli_db(args: argparse.Namespace) -> None:
    with open(args.manpage_urls, 'r') as manpage_urls:
        md = DocBookConverter(json.load(manpage_urls), args.revision)
        md.convert(args.infile, args.outfile)

def _run_cli_html(args: argparse.Namespace) -> None:
    with open(args.manpage_urls, 'r') as manpage_urls:
        md = HTMLConverter(
@@ -777,13 +697,10 @@ def _run_cli_html(args: argparse.Namespace) -> None:

def build_cli(p: argparse.ArgumentParser) -> None:
    formats = p.add_subparsers(dest='format', required=True)
    _build_cli_db(formats.add_parser('docbook'))
    _build_cli_html(formats.add_parser('html'))

def run_cli(args: argparse.Namespace) -> None:
    if args.format == 'docbook':
        _run_cli_db(args)
    elif args.format == 'html':
    if args.format == 'html':
        _run_cli_html(args)
    else:
        raise RuntimeError('format not hooked up', args)
+1 −122

File changed.

Preview size limit exceeded, changes collapsed.

+2 −2
Original line number Diff line number Diff line
@@ -2,11 +2,11 @@ import nixos_render_docs as nrd

from markdown_it.token import Token

class Converter(nrd.md.Converter[nrd.docbook.DocBookRenderer]):
class Converter(nrd.md.Converter[nrd.html.HTMLRenderer]):
    # actual renderer doesn't matter, we're just parsing.
    def __init__(self, manpage_urls: dict[str, str]) -> None:
        super().__init__()
        self._renderer = nrd.docbook.DocBookRenderer(manpage_urls)
        self._renderer = nrd.html.HTMLRenderer(manpage_urls, {})

def test_heading_id_absent() -> None:
    c = Converter({})
+2 −2
Original line number Diff line number Diff line
@@ -3,11 +3,11 @@ import pytest

from markdown_it.token import Token

class Converter(nrd.md.Converter[nrd.docbook.DocBookRenderer]):
class Converter(nrd.md.Converter[nrd.html.HTMLRenderer]):
    # actual renderer doesn't matter, we're just parsing.
    def __init__(self, manpage_urls: dict[str, str]) -> None:
        super().__init__()
        self._renderer = nrd.docbook.DocBookRenderer(manpage_urls)
        self._renderer = nrd.html.HTMLRenderer(manpage_urls, {})

@pytest.mark.parametrize("ordered", [True, False])
def test_list_wide(ordered: bool) -> None:
Loading