Unverified Commit 1f7ac8f5 authored by Silvan Mosberger's avatar Silvan Mosberger Committed by GitHub
Browse files

Merge pull request #296384 from pennae/remove-docbook-docs-support

nixos/docs: remove docbook support machinery
parents b2245dab 629cd944
Loading
Loading
Loading
Loading
+1 −15
Original line number Diff line number Diff line
@@ -232,19 +232,5 @@ in rec {
      echo "file json-br $dst/options.json.br" >> $out/nix-support/hydra-build-products
    '';

  optionsDocBook = lib.warn "optionsDocBook is deprecated since 23.11 and will be removed in 24.05"
    (pkgs.runCommand "options-docbook.xml" {
      nativeBuildInputs = [
        pkgs.nixos-render-docs
      ];
    } ''
      nixos-render-docs -j $NIX_BUILD_CORES options docbook \
        --manpage-urls ${pkgs.path + "/doc/manpage-urls.json"} \
        --revision ${lib.escapeShellArg revision} \
        --document-type ${lib.escapeShellArg documentType} \
        --varlist-id ${lib.escapeShellArg variablelistId} \
        --id-prefix ${lib.escapeShellArg optionIdPrefix} \
        ${optionsJSON}/share/doc/nixos/options.json \
        "$out"
    '');
  optionsDocBook = throw "optionsDocBook has been removed in 24.05";
}
+0 −247
Original line number Diff line number Diff line
from collections.abc import Mapping, Sequence
from typing import cast, Optional, NamedTuple

from markdown_it.token import Token
from xml.sax.saxutils import escape, quoteattr

from .md import Renderer

_xml_id_translate_table = {
    ord('*'): ord('_'),
    ord('<'): ord('_'),
    ord(' '): ord('_'),
    ord('>'): ord('_'),
    ord('['): ord('_'),
    ord(']'): ord('_'),
    ord(':'): ord('_'),
    ord('"'): ord('_'),
}
def make_xml_id(s: str) -> str:
    return s.translate(_xml_id_translate_table)

class Deflist:
    has_dd = False

class Heading(NamedTuple):
    container_tag: str
    level: int
    # special handling for <part> titles: whether partinfo was already closed from elsewhere
    # or still needs closing.
    partintro_closed: bool = False

class DocBookRenderer(Renderer):
    _link_tags: list[str]
    _deflists: list[Deflist]
    _headings: list[Heading]
    _attrspans: list[str]

    def __init__(self, manpage_urls: Mapping[str, str]):
        super().__init__(manpage_urls)
        self._link_tags = []
        self._deflists = []
        self._headings = []
        self._attrspans = []

    def render(self, tokens: Sequence[Token]) -> str:
        result = super().render(tokens)
        result += self._close_headings(None)
        return result
    def renderInline(self, tokens: Sequence[Token]) -> str:
        # HACK to support docbook links and xrefs. link handling is only necessary because the docbook
        # manpage stylesheet converts - in urls to a mathematical minus, which may be somewhat incorrect.
        for i, token in enumerate(tokens):
            if token.type != 'link_open':
                continue
            token.tag = 'link'
            # turn [](#foo) into xrefs
            if token.attrs['href'][0:1] == '#' and tokens[i + 1].type == 'link_close': # type: ignore[index]
                token.tag = "xref"
            # turn <x> into links without contents
            if tokens[i + 1].type == 'text' and tokens[i + 1].content == token.attrs['href']:
                tokens[i + 1].content = ''

        return super().renderInline(tokens)

    def text(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return escape(token.content)
    def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para>"
    def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</para>"
    def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<literallayout>\n</literallayout>"
    def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        # should check options.breaks() and emit hard break if so
        return "\n"
    def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return f"<literal>{escape(token.content)}</literal>"
    def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return f"<programlisting>{escape(token.content)}</programlisting>"
    def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        self._link_tags.append(token.tag)
        href = cast(str, token.attrs['href'])
        (attr, start) = ('linkend', 1) if href[0] == '#' else ('xlink:href', 0)
        return f"<{token.tag} {attr}={quoteattr(href[start:])}>"
    def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return f"</{self._link_tags.pop()}>"
    def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<listitem>"
    def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</listitem>\n"
    # HACK open and close para for docbook change size. remove soon.
    def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        spacing = ' spacing="compact"' if token.meta.get('compact', False) else ''
        return f"<para><itemizedlist{spacing}>\n"
    def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "\n</itemizedlist></para>"
    def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<emphasis>"
    def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</emphasis>"
    def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<emphasis role=\"strong\">"
    def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</emphasis>"
    def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        info = f" language={quoteattr(token.info)}" if token.info != "" else ""
        return f"<programlisting{info}>{escape(token.content)}</programlisting>"
    def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><blockquote>"
    def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</blockquote></para>"
    def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><note>"
    def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</note></para>"
    def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><caution>"
    def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</caution></para>"
    def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><important>"
    def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</important></para>"
    def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><tip>"
    def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</tip></para>"
    def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<para><warning>"
    def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</warning></para>"
    # markdown-it emits tokens based on the html syntax tree, but docbook is
    # slightly different. html has <dl>{<dt/>{<dd/>}}</dl>,
    # docbook has <variablelist>{<varlistentry><term/><listitem/></varlistentry>}<variablelist>
    # we have to reject multiple definitions for the same term for time being.
    def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        self._deflists.append(Deflist())
        return "<para><variablelist>"
    def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        self._deflists.pop()
        return "</variablelist></para>"
    def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        self._deflists[-1].has_dd = False
        return "<varlistentry><term>"
    def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</term>"
    def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        if self._deflists[-1].has_dd:
            raise Exception("multiple definitions per term not supported")
        self._deflists[-1].has_dd = True
        return "<listitem>"
    def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</listitem></varlistentry>"
    def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        if token.meta['name'] == 'command':
            return f"<command>{escape(token.content)}</command>"
        if token.meta['name'] == 'file':
            return f"<filename>{escape(token.content)}</filename>"
        if token.meta['name'] == 'var':
            return f"<varname>{escape(token.content)}</varname>"
        if token.meta['name'] == 'env':
            return f"<envar>{escape(token.content)}</envar>"
        if token.meta['name'] == 'option':
            return f"<option>{escape(token.content)}</option>"
        if token.meta['name'] == 'manpage':
            [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ]
            section = section[:-1]
            man = f"{page}({section})"
            title = f"<refentrytitle>{escape(page)}</refentrytitle>"
            vol = f"<manvolnum>{escape(section)}</manvolnum>"
            ref = f"<citerefentry>{title}{vol}</citerefentry>"
            if man in self._manpage_urls:
                return f"<link xlink:href={quoteattr(self._manpage_urls[man])}>{ref}</link>"
            else:
                return ref
        raise NotImplementedError("md node not supported yet", token)
    def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        # we currently support *only* inline anchors and the special .keycap class to produce
        # <keycap> docbook elements.
        (id_part, class_part) = ("", "")
        if s := token.attrs.get('id'):
            id_part = f'<anchor xml:id={quoteattr(cast(str, s))} />'
        if s := token.attrs.get('class'):
            if s == 'keycap':
                class_part = "<keycap>"
                self._attrspans.append("</keycap>")
            else:
                return super().attr_span_begin(token, tokens, i)
        else:
            self._attrspans.append("")
        return id_part + class_part
    def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return self._attrspans.pop()
    def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        start = f' startingnumber="{token.attrs["start"]}"' if 'start' in token.attrs else ""
        spacing = ' spacing="compact"' if token.meta.get('compact', False) else ''
        return f"<orderedlist{start}{spacing}>"
    def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</orderedlist>"
    def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        hlevel = int(token.tag[1:])
        result = self._close_headings(hlevel)
        (tag, attrs) = self._heading_tag(token, tokens, i)
        self._headings.append(Heading(tag, hlevel))
        attrs_str = "".join([ f" {k}={quoteattr(v)}" for k, v in attrs.items() ])
        return result + f'<{tag}{attrs_str}>\n<title>'
    def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        heading = self._headings[-1]
        result = '</title>'
        if heading.container_tag == 'part':
            # generate the same ids as were previously assigned manually. if this collides we
            # rely on outside schema validation to catch it!
            maybe_id = ""
            assert tokens[i - 2].type == 'heading_open'
            if id := cast(str, tokens[i - 2].attrs.get('id', "")):
                maybe_id = " xml:id=" + quoteattr(id + "-intro")
            result += f"<partintro{maybe_id}>"
        return result
    def example_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        if id := cast(str, token.attrs.get('id', '')):
            id = f'xml:id={quoteattr(id)}' if id else ''
        return f'<example {id}>'
    def example_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</example>"
    def example_title_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "<title>"
    def example_title_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "</title>"

    def _close_headings(self, level: Optional[int]) -> str:
        # we rely on markdown-it producing h{1..6} tags in token.tag for this to work
        result = []
        while len(self._headings):
            if level is None or self._headings[-1].level >= level:
                heading = self._headings.pop()
                if heading.container_tag == 'part' and not heading.partintro_closed:
                    result.append("</partintro>")
                result.append(f"</{heading.container_tag}>")
            else:
                break
        return "\n".join(result)

    def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]:
        attrs = {}
        if id := token.attrs.get('id'):
            attrs['xml:id'] = cast(str, id)
        return ("section", attrs)
+9 −92
Original line number Diff line number Diff line
@@ -13,9 +13,8 @@ from typing import Any, Callable, cast, ClassVar, Generic, get_args, NamedTuple
from markdown_it.token import Token

from . import md, options
from .docbook import DocBookRenderer, Heading, make_xml_id
from .html import HTMLRenderer, UnresolvedXrefError
from .manual_structure import check_structure, FragmentType, is_include, TocEntry, TocEntryType, XrefTarget
from .manual_structure import check_structure, FragmentType, is_include, make_xml_id, TocEntry, TocEntryType, XrefTarget
from .md import Converter, Renderer

class BaseConverter(Converter[md.TR], Generic[md.TR]):
@@ -200,74 +199,6 @@ class RendererMixin(Renderer):
    def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        raise NotImplementedError()

class ManualDocBookRenderer(RendererMixin, DocBookRenderer):
    def __init__(self, toplevel_tag: str, revision: str, manpage_urls: Mapping[str, str]):
        super().__init__(toplevel_tag, revision, manpage_urls)

    def _render_book(self, tokens: Sequence[Token]) -> str:
        assert tokens[1].children
        assert tokens[4].children
        if (maybe_id := cast(str, tokens[0].attrs.get('id', ""))):
            maybe_id = "xml:id=" + xml.quoteattr(maybe_id)
        return (f'<book xmlns="http://docbook.org/ns/docbook"'
                f'      xmlns:xlink="http://www.w3.org/1999/xlink"'
                f'      {maybe_id} version="5.0">'
                f'  <title>{self.renderInline(tokens[1].children)}</title>'
                f'  <subtitle>{self.renderInline(tokens[4].children)}</subtitle>'
                f'  {super(DocBookRenderer, self).render(tokens[6:])}'
                f'</book>')

    def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]:
        (tag, attrs) = super()._heading_tag(token, tokens, i)
        # render() has already verified that we don't have supernumerary headings and since the
        # book tag is handled specially we can leave the check this simple
        if token.tag != 'h1':
            return (tag, attrs)
        return (self._toplevel_tag, attrs | {
            'xmlns': "http://docbook.org/ns/docbook",
            'xmlns:xlink': "http://www.w3.org/1999/xlink",
        })

    def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
        result = []
        # close existing partintro. the generic render doesn't really need this because
        # it doesn't have a concept of structure in the way the manual does.
        if self._headings and self._headings[-1] == Heading('part', 1):
            result.append("</partintro>")
            self._headings[-1] = self._headings[-1]._replace(partintro_closed=True)
        # must nest properly for structural includes. this requires saving at least
        # the headings stack, but creating new renderers is cheap and much easier.
        r = ManualDocBookRenderer(tag, self._revision, self._manpage_urls)
        for (included, path) in token.meta['included']:
            try:
                result.append(r.render(included))
            except Exception as e:
                raise RuntimeError(f"rendering {path}") from e
        return "".join(result)
    def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        conv = options.DocBookConverter(self._manpage_urls, self._revision, 'fragment',
                                        token.meta['list-id'], token.meta['id-prefix'])
        conv.add_options(token.meta['source'])
        return conv.finalize(fragment=True)

    # TODO minimize docbook diffs with existing conversions. remove soon.
    def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return super().paragraph_open(token, tokens, i) + "\n "
    def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return "\n" + super().paragraph_close(token, tokens, i)
    def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        return f"<programlisting>\n{xml.escape(token.content)}</programlisting>"
    def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
        info = f" language={xml.quoteattr(token.info)}" if token.info != "" else ""
        return f"<programlisting{info}>\n{xml.escape(token.content)}</programlisting>"

class DocBookConverter(BaseConverter[ManualDocBookRenderer]):
    INCLUDE_ARGS_NS = "docbook"

    def __init__(self, manpage_urls: Mapping[str, str], revision: str):
        super().__init__()
        self._renderer = ManualDocBookRenderer('book', revision, manpage_urls)


class HTMLParameters(NamedTuple):
    generator: str
@@ -457,7 +388,7 @@ class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
                    f' </span>'
                    f'</dt>'
                )
                # we want to look straight through parts because docbook-xsl does too, but it
                # we want to look straight through parts because docbook-xsl did too, but it
                # also makes for more uesful top-level tocs.
                next_level = walk_and_emit(child, depth - (0 if child.kind == 'part' else 1))
                if next_level:
@@ -477,7 +408,7 @@ class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
                '</div>'
            )
        # we don't want to generate the "Title of Contents" header for sections,
        # docbook doesn't and it's only distracting clutter unless it's the main table.
        # docbook didn't and it's only distracting clutter unless it's the main table.
        # we also want to generate tocs only for a top-level section (ie, one that is
        # not itself contained in another section)
        print_title = toc.kind != 'section'
@@ -506,12 +437,12 @@ class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
        ])

    def _make_hN(self, level: int) -> tuple[str, str]:
        # for some reason chapters don't increase the hN nesting count in docbook xslts. duplicate
        # this for consistency.
        # for some reason chapters didn't increase the hN nesting count in docbook xslts.
        # originally this was duplicated here for consistency with docbook rendering, but
        # it could be reevaluated and changed now that docbook is gone.
        if self._toplevel_tag == 'chapter':
            level -= 1
        # TODO docbook compat. these are never useful for us, but not having them breaks manual
        # compare workflows while docbook is still allowed.
        # this style setting is also for docbook compatibility only and could well go away.
        style = ""
        if level + self._hlevel_offset < 3 \
           and (self._toplevel_tag == 'section' or (self._toplevel_tag == 'chapter' and level > 0)):
@@ -537,7 +468,7 @@ class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
        if into:
            toc = TocEntry.of(fragments[0][0][0])
            inner.append(self._file_header(toc))
            # we do not set _hlevel_offset=0 because docbook doesn't either.
            # we do not set _hlevel_offset=0 because docbook didn't either.
        else:
            inner = outer
        in_dir = self._in_dir
@@ -742,12 +673,6 @@ class HTMLConverter(BaseConverter[ManualHTMLRenderer]):



def _build_cli_db(p: argparse.ArgumentParser) -> None:
    p.add_argument('--manpage-urls', required=True)
    p.add_argument('--revision', required=True)
    p.add_argument('infile', type=Path)
    p.add_argument('outfile', type=Path)

def _build_cli_html(p: argparse.ArgumentParser) -> None:
    p.add_argument('--manpage-urls', required=True)
    p.add_argument('--revision', required=True)
@@ -761,11 +686,6 @@ def _build_cli_html(p: argparse.ArgumentParser) -> None:
    p.add_argument('infile', type=Path)
    p.add_argument('outfile', type=Path)

def _run_cli_db(args: argparse.Namespace) -> None:
    with open(args.manpage_urls, 'r') as manpage_urls:
        md = DocBookConverter(json.load(manpage_urls), args.revision)
        md.convert(args.infile, args.outfile)

def _run_cli_html(args: argparse.Namespace) -> None:
    with open(args.manpage_urls, 'r') as manpage_urls:
        md = HTMLConverter(
@@ -777,13 +697,10 @@ def _run_cli_html(args: argparse.Namespace) -> None:

def build_cli(p: argparse.ArgumentParser) -> None:
    formats = p.add_subparsers(dest='format', required=True)
    _build_cli_db(formats.add_parser('docbook'))
    _build_cli_html(formats.add_parser('html'))

def run_cli(args: argparse.Namespace) -> None:
    if args.format == 'docbook':
        _run_cli_db(args)
    elif args.format == 'html':
    if args.format == 'html':
        _run_cli_html(args)
    else:
        raise RuntimeError('format not hooked up', args)
+17 −0
Original line number Diff line number Diff line
@@ -201,3 +201,20 @@ class TocEntry(Freezeable):
        while len(entries) > 1:
            entries[-2][1].children.append(entries.pop()[1])
        return (entries[0][1], examples, figures)

_xml_id_translate_table = {
    ord('*'): ord('_'),
    ord('<'): ord('_'),
    ord(' '): ord('_'),
    ord('>'): ord('_'),
    ord('['): ord('_'),
    ord(']'): ord('_'),
    ord(':'): ord('_'),
    ord('"'): ord('_'),
}
# this function is needed to generate option id attributes in the same format as
# the docbook toolchain did to not break existing links. we don't actually use
# xml any more, that's just the legacy we're dealing with and part of our structure
# now.
def make_xml_id(s: str) -> str:
    return s.translate(_xml_id_translate_table)
+2 −123

File changed.

Preview size limit exceeded, changes collapsed.

Loading