Unverified Commit e214a014 authored by Valentin Gagarin's avatar Valentin Gagarin Committed by GitHub
Browse files

nixos-render-docs: Display relevant source for errors (#416302)

parents be9e2149 014ca1cc
Loading
Loading
Loading
Loading
+88 −28
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ from .html import HTMLRenderer, UnresolvedXrefError
from .manual_structure import check_structure, FragmentType, is_include, make_xml_id, TocEntry, TocEntryType, XrefTarget
from .md import Converter, Renderer
from .redirects import Redirects
from .src_error import SrcError

class BaseConverter(Converter[md.TR], Generic[md.TR]):
    # per-converter configuration for ns:arg=value arguments to include blocks, following
@@ -44,14 +45,18 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
    def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None:
        pass

    def _handle_headings(self, tokens: list[Token], *, on_heading: Callable[[Token,str],None]) -> None:
    def _handle_headings(self, tokens: list[Token], *, src: str, on_heading: Callable[[Token,str],None]) -> None:
        # Headings in a globally numbered order
        # h1 to h6
        curr_heading_pos: list[int] = []
        for token in tokens:
            if token.type == "heading_open":
                if token.tag not in ["h1", "h2", "h3", "h4", "h5", "h6"]:
                    raise RuntimeError(f"Got invalid heading tag {token.tag} in line {token.map[0] + 1 if token.map else 'NOT FOUND'}. Only h1 to h6 headings are allowed.")
                    raise SrcError(
                        src=src,
                        description=f"Got invalid heading tag {token.tag!r}. Only h1 to h6 headings are allowed.",
                        token=token,
                    )

                idx = int(token.tag[1:]) - 1

@@ -75,10 +80,10 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
                if "id" not in token.attrs:
                    token.attrs["id"] = f"{auto_id_prefix}-{ident}"

            self._handle_headings(tokens, on_heading=set_token_ident)
            self._handle_headings(tokens, src=src, on_heading=set_token_ident)


        check_structure(self._current_type[-1], tokens)
        check_structure(src, self._current_type[-1], tokens)
        for token in tokens:
            if not is_include(token):
                continue
@@ -89,35 +94,46 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
            typ = directive[0]
            if typ == 'options':
                token.type = 'included_options'
                self._process_include_args(token, args, self.INCLUDE_OPTIONS_ALLOWED_ARGS)
                self._parse_options(token, args)
                self._process_include_args(src, token, args, self.INCLUDE_OPTIONS_ALLOWED_ARGS)
                self._parse_options(src, token, args)
            else:
                fragment_type = typ.removesuffix('s')
                if fragment_type not in get_args(FragmentType):
                    raise RuntimeError(f"unsupported structural include type '{typ}'")
                    raise SrcError(
                        src=src,
                        description=f"unsupported structural include type {typ!r}",
                        token=token,
                    )
                self._current_type.append(cast(FragmentType, fragment_type))
                token.type = 'included_' + typ
                self._process_include_args(token, args, self.INCLUDE_FRAGMENT_ALLOWED_ARGS)
                self._parse_included_blocks(token, args)
                self._process_include_args(src, token, args, self.INCLUDE_FRAGMENT_ALLOWED_ARGS)
                self._parse_included_blocks(src, token, args)
                self._current_type.pop()
        return tokens

    def _process_include_args(self, token: Token, args: dict[str, str], allowed: set[str]) -> None:
    def _process_include_args(self, src: str, token: Token, args: dict[str, str], allowed: set[str]) -> None:
        ns = self.INCLUDE_ARGS_NS + ":"
        args = { k[len(ns):]: v for k, v in args.items() if k.startswith(ns) }
        if unknown := set(args.keys()) - allowed:
            assert token.map
            raise RuntimeError(f"unrecognized include argument in line {token.map[0] + 1}", unknown)
            raise SrcError(
                src=src,
                description=f"unrecognized include argument(s): {unknown}",
                token=token,
            )
        token.meta['include-args'] = args

    def _parse_included_blocks(self, token: Token, block_args: dict[str, str]) -> None:
    def _parse_included_blocks(self, src: str, token: Token, block_args: dict[str, str]) -> None:
        assert token.map
        included = token.meta['included'] = []
        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 1):
            line = line.strip()
            path = self._base_paths[-1].parent / line
            if path in self._base_paths:
                raise RuntimeError(f"circular include found in line {lnum}")
                raise SrcError(
                    src=src,
                    description="circular include found",
                    token=token,
                )
            try:
                self._base_paths.append(path)
                with open(path, 'r') as f:
@@ -130,29 +146,57 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
                    included.append((tokens, path))
                self._base_paths.pop()
            except Exception as e:
                raise RuntimeError(f"processing included file {path} from line {lnum}") from e
                raise SrcError(
                    src=src,
                    description=f"processing included file {path}",
                    token=lnum,
                ) from e

    def _parse_options(self, token: Token, block_args: dict[str, str]) -> None:
    def _parse_options(self, src: str, token: Token, block_args: dict[str, str]) -> None:
        assert token.map

        items = {}
        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 1):
            if len(args := line.split(":", 1)) != 2:
                raise RuntimeError(f"options directive with no argument in line {lnum}")
                raise SrcError(
                    src=src,
                    description=f"options directive with no argument",
                    tokens={
                        "Directive": lnum,
                        "Block": token,
                    },
                )
            (k, v) = (args[0].strip(), args[1].strip())
            if k in items:
                raise RuntimeError(f"duplicate options directive {k} in line {lnum}")
                raise SrcError(
                    src=src,
                    description=f"duplicate options directive {k!r}",
                    tokens={
                        "Directive": lnum,
                        "Block": token,
                    },
                )
            items[k] = v

        try:
            id_prefix = items.pop('id-prefix')
            varlist_id = items.pop('list-id')
            source = items.pop('source')
        except KeyError as e:
            raise RuntimeError(f"options directive {e} missing in block at line {token.map[0] + 1}")
            raise SrcError(
                src=src,
                description=f"options directive {e} missing",
                tokens={
                    "Block": token,
                },
            ) from e

        if items.keys():
            raise RuntimeError(
                f"unsupported options directives in block at line {token.map[0] + 1}",
                " ".join(items.keys()))
            raise SrcError(
                src=src,
                description=f"unsupported options directives: {set(items.keys())}",
                token=token,
            )

        try:
            with open(self._base_paths[-1].parent / source, 'r') as f:
@@ -160,7 +204,11 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
                token.meta['list-id'] = varlist_id
                token.meta['source'] = json.load(f)
        except Exception as e:
            raise RuntimeError(f"processing options block in line {token.map[0] + 1}") from e
            raise SrcError(
                src=src,
                description="processing options block",
                token=token,
            ) from e

class RendererMixin(Renderer):
    _toplevel_tag: str
@@ -542,14 +590,26 @@ class HTMLConverter(BaseConverter[ManualHTMLRenderer]):
                continue
            assert token.map
            if len(token.meta['included']) == 0:
                raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is empty!")
                raise SrcError(
                    src=src,
                    description=f"redirection target {into!r} is empty!",
                    token=token,
                )
            # we use blender-style //path to denote paths relative to the origin file
            # (usually index.html). this makes everything a lot easier and clearer.
            if not into.startswith("//") or '/' in into[2:]:
                raise RuntimeError("html:into-file must be a relative-to-origin //filename", into)
                raise SrcError(
                    src=src,
                    description=f"html:into-file must be a relative-to-origin //filename: {into}",
                    token=token,
                )
            into = token.meta['include-args']['into-file'] = into[2:]
            if into in self._redirection_targets:
                raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is already in use")
                raise SrcError(
                    src=src,
                    description=f"redirection target {into} is already in use",
                    token=token,
                )
            self._redirection_targets.add(into)
        return tokens

+47 −28
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ from typing import cast, get_args, Iterable, Literal, Sequence
from markdown_it.token import Token

from .utils import Freezeable
from .src_error import SrcError

# FragmentType is used to restrict structural include blocks.
FragmentType = Literal['preface', 'part', 'chapter', 'section', 'appendix']
@@ -21,37 +22,45 @@ def is_include(token: Token) -> bool:

# toplevel file must contain only the title headings and includes, anything else
# would cause strange rendering.
def _check_book_structure(tokens: Sequence[Token]) -> None:
def _check_book_structure(src: str, tokens: Sequence[Token]) -> None:
    for token in tokens[6:]:
        if not is_include(token):
            assert token.map
            raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, "
                               "expected structural include")
            raise SrcError(
                src=src,
                description=f"unexpected content; expected structural include",
                token=token,
            )

# much like books, parts may not contain headings other than their title heading.
# this is a limitation of the current renderers and TOC generators that do not handle
# this case well even though it is supported in docbook (and probably supportable
# anywhere else).
def _check_part_structure(tokens: Sequence[Token]) -> None:
    _check_fragment_structure(tokens)
def _check_part_structure(src: str,tokens: Sequence[Token]) -> None:
    _check_fragment_structure(src, tokens)
    for token in tokens[3:]:
        if token.type == 'heading_open':
            assert token.map
            raise RuntimeError(f"unexpected heading in line {token.map[0] + 1}")
            raise SrcError(
                src=src,
                description="unexpected heading",
                token=token,
            )

# two include blocks must either be adjacent or separated by a heading, otherwise
# we cannot generate a correct TOC (since there'd be nothing to link to between
# the two includes).
def _check_fragment_structure(tokens: Sequence[Token]) -> None:
def _check_fragment_structure(src: str, tokens: Sequence[Token]) -> None:
    for i, token in enumerate(tokens):
        if is_include(token) \
           and i + 1 < len(tokens) \
           and not (is_include(tokens[i + 1]) or tokens[i + 1].type == 'heading_open'):
            assert token.map
            raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, "
                               "expected heading or structural include")
            raise SrcError(
                src=src,
                description="unexpected content; expected heading or structural include",
                token=token,
            )

def check_structure(kind: TocEntryType, tokens: Sequence[Token]) -> None:
def check_structure(src: str, kind: TocEntryType, tokens: Sequence[Token]) -> None:
    wanted = { 'h1': 'title' }
    wanted |= { 'h2': 'subtitle' } if kind == 'book' else {}
    for (i, (tag, role)) in enumerate(wanted.items()):
@@ -59,17 +68,21 @@ def check_structure(kind: TocEntryType, tokens: Sequence[Token]) -> None:
            raise RuntimeError(f"missing {role} ({tag}) heading")
        token = tokens[3 * i]
        if token.type != 'heading_open' or token.tag != tag:
            assert token.map
            raise RuntimeError(f"expected {role} ({tag}) heading in line {token.map[0] + 1}", token)
            raise SrcError(
                src=src,
                description=f"expected {role} ({tag}) heading",
                token=token,
            )
    for t in tokens[3 * len(wanted):]:
        if t.type != 'heading_open' or not (role := wanted.get(t.tag, '')):
            continue
        assert t.map
        raise RuntimeError(
            f"only one {role} heading ({t.markup} [text...]) allowed per "
            f"{kind}, but found a second in line {t.map[0] + 1}. "
            "please remove all such headings except the first or demote the subsequent headings.",
            t)
        raise SrcError(
            src=src,
            description=f"only one {role} heading ({t.markup} [text...]) allowed per "
            f"{kind}, but found a second. "
            "Please remove all such headings except the first or demote the subsequent headings.",
            token=t,
        )

    last_heading_level = 0
    for token in tokens:
@@ -80,22 +93,28 @@ def check_structure(kind: TocEntryType, tokens: Sequence[Token]) -> None:
        # every other headings needs one too. we need this to build a TOC and to
        # provide stable links if the manual changes shape.
        if 'id' not in token.attrs and (kind != 'book' or token.tag != 'h2'):
            assert token.map
            raise RuntimeError(f"heading in line {token.map[0] + 1} does not have an id")
            raise SrcError(
                src=src,
                description=f"heading does not have an id",
                token=token,
            )

        level = int(token.tag[1:]) # because tag = h1..h6
        if level > last_heading_level + 1:
            assert token.map
            raise RuntimeError(f"heading in line {token.map[0] + 1} skips one or more heading levels, "
                               "which is currently not allowed")
            raise SrcError(
                src=src,
                description=f"heading skips one or more heading levels, "
                               "which is currently not allowed",
                token=token,
            )
        last_heading_level = level

    if kind == 'book':
        _check_book_structure(tokens)
        _check_book_structure(src, tokens)
    elif kind == 'part':
        _check_part_structure(tokens)
        _check_part_structure(src, tokens)
    else:
        _check_fragment_structure(tokens)
        _check_fragment_structure(src, tokens)

@dc.dataclass(frozen=True)
class XrefTarget:
+29 −9
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ import dataclasses
import re

from .types import RenderFn
from .src_error import SrcError

import markdown_it
from markdown_it.token import Token
@@ -446,12 +447,16 @@ def _footnote_ids(md: markdown_it.MarkdownIt) -> None:
       generate here are derived from the footnote label, making numeric footnote
       labels invalid.
    """
    def generate_ids(tokens: Sequence[Token]) -> None:
    def generate_ids(src: str, tokens: Sequence[Token]) -> None:
        for token in tokens:
            if token.type == 'footnote_open':
                if token.meta["label"][:1].isdigit():
                    assert token.map
                    raise RuntimeError(f"invalid footnote label in line {token.map[0] + 1}")
                    raise SrcError(
                        src=src,
                        description="invalid footnote label",
                        token=token,
                    )
                token.attrs['id'] = token.meta["label"]
            elif token.type == 'footnote_anchor':
                token.meta['target'] = f'{token.meta["label"]}.__back.{token.meta["subId"]}'
@@ -460,10 +465,10 @@ def _footnote_ids(md: markdown_it.MarkdownIt) -> None:
                token.meta['target'] = token.meta["label"]
            elif token.type == 'inline':
                assert token.children is not None
                generate_ids(token.children)
                generate_ids(src, token.children)

    def footnote_ids(state: markdown_it.rules_core.StateCore) -> None:
        generate_ids(state.tokens)
        generate_ids(state.src, state.tokens)

    md.core.ruler.after("footnote_tail", "footnote_ids", footnote_ids)

@@ -537,7 +542,7 @@ def _block_titles(block: str) -> Callable[[markdown_it.MarkdownIt], None]:
    non-title heading since those would make toc generation extremely complicated.
    """
    def block_titles(state: markdown_it.rules_core.StateCore) -> None:
        in_example = [False]
        in_example = [None]
        for i, token in enumerate(state.tokens):
            if token.type == open:
                if state.tokens[i + 1].type == 'heading_open':
@@ -545,14 +550,29 @@ def _block_titles(block: str) -> Callable[[markdown_it.MarkdownIt], None]:
                    state.tokens[i + 1].type = title_open
                    state.tokens[i + 3].type = title_close
                else:
                    assert token.map
                    raise RuntimeError(f"found {block} without title in line {token.map[0] + 1}")
                in_example.append(True)
                    raise SrcError(
                        src=state.src,
                        description=f"found {block} without title",
                        token=token,
                    )
                in_example.append(token)
            elif token.type == close:
                in_example.pop()
            elif token.type == 'heading_open' and in_example[-1]:
                assert token.map
                raise RuntimeError(f"unexpected non-title heading in {block} in line {token.map[0] + 1}")
                started_at = in_example[-1]

                block_display = ":::{." + block + "}"

                raise SrcError(
                    description=f"unexpected non-title heading in `{block_display}`; are you missing a `:::`?\n"
                        f"Note: blocks like `{block_display}` are only allowed to contain a single heading in order to simplify TOC generation.",
                    src=state.src,
                    tokens={
                        f"`{block_display}` block": started_at,
                        "Unexpected heading": token,
                    },
                )

    def do_add(md: markdown_it.MarkdownIt) -> None:
        md.core.ruler.push(f"{block}_titles", block_titles)
+154 −0
Original line number Diff line number Diff line
from typing import Tuple

from markdown_it.token import Token

LineSpan = int | Tuple[int, int] | Token


class SrcError(Exception):
    """An error associated with a source file and location."""

    def __init__(
        self,
        *,
        description: str,
        src: str,
        tokens: dict[str, LineSpan] | None = None,
        token: LineSpan | None = None,
    ):
        """Create a new `SrcError`.

        Arguments:
        - `description`: A description of the error.

        - `src`: The source text the `token`s are from.

        - `tokens`: A dictionary from descriptions to `Tokens` (or lines) associated with
          the error.

          The tokens are used for their source location.

          A location like ` at lines 6-9` will be added to the description.

          If the description is empty, the location will be described as `At
          lines 6-9`.

        - `token`: Shorthand for `tokens={"": token}`.
        """
        self.src = src

        tokens = tokens or {}
        if token:
            tokens[""] = token
        self.tokens = tokens

        self.description = description

        self.message = _src_error_str(src=src, tokens=tokens, description=description)

        super().__init__(self.message)

    def __str__(self) -> str:
        return self.message


def _get_line_span(location: LineSpan) -> Tuple[int, int] | None:
    if isinstance(location, Token):
        if location.map:
            return (location.map[0], location.map[1])
        else:
            return None
    elif isinstance(location, int):
        return (location, location + 1)
    else:
        return location


def _src_error_str(*, src: str, tokens: dict[str, LineSpan], description: str) -> str:
    """Python exceptions are a bit goofy and need a `message` string attribute
    right away, so we basically need a way to generate the string before we
    actually finish `__init__`.
    """

    result = [description]

    src_lines = src.splitlines()

    for description, token in tokens.items():
        result.append("\n\n\x1b[33m")

        if description:
            result.append(description)
            result.append(" at ")
        else:
            result.append("At ")

        maybe_span = _get_line_span(token)

        if not maybe_span:
            result.append("unknown location\x1b[0m")
            continue

        start, end = maybe_span
        # Note: `end` is exclusive, so single-line spans are represented as
        # `(n, n+1)`.
        if start == end - 1:
            result.append("line ")
            result.append(str(start + 1))
        else:
            result.append("lines ")
            result.append(str(start + 1))
            result.append("-")
            result.append(str(end))

        result.append(":\x1b[0m\n")

        result.append(src_excerpt(src_lines=src_lines, start=start, end=end))

    return "".join(result)


def src_excerpt(
    *, src_lines: list[str], start: int, end: int, context: int = 3, max_lines: int = 20
) -> str:
    output = []

    def clamp_line(line_num: int) -> int:
        return max(0, min(len(src_lines), line_num))

    def add_line(line_num: int, *, is_context: bool) -> None:
        # Lines start with the line number, dimmed.
        prefix = "\x1b[2m\x1b[37m" + format(line_num + 1, " 4d") + "\x1b[0m"

        # Context lines are prefixed with a dotted line, non-context lines are
        # prefixed with a bold yellow line.
        if is_context:
            # Note: No reset here because context lines are dimmed.
            prefix += " \x1b[2m\x1b[37m┆ "
        else:
            prefix += " \x1b[1m\x1b[33m┃\x1b[0m "

        output.append(prefix + src_lines[line_num] + "\x1b[0m")

    def add_lines(start: int, end: int, is_context: bool) -> None:
        for i in range(clamp_line(start), clamp_line(end)):
            add_line(i, is_context=is_context)

    if end - start > max_lines:
        # If we have more than `max_lines` in the range, show a `...` in the middle.
        half_max_lines = max_lines // 2

        add_lines(start - context, start, is_context=True)
        add_lines(start, start + half_max_lines, is_context=False)

        output.append("     \x1b[2m\x1b[37m...\x1b[0m")

        add_lines(end - half_max_lines, end, is_context=False)
        add_lines(end, end + context, is_context=True)

    else:
        add_lines(start - context, start, is_context=True)
        add_lines(start, end, is_context=False)
        add_lines(end, end + context, is_context=True)

    return "\n".join(output)
+3 −4
Original line number Diff line number Diff line
@@ -3,7 +3,6 @@ from pathlib import Path
from markdown_it.token import Token
from nixos_render_docs.manual import HTMLConverter, HTMLParameters
from nixos_render_docs.md import Converter
from nixos_render_docs.redirects import Redirects

auto_id_prefix="TEST_PREFIX"
def set_prefix(token: Token, ident: str) -> None:
@@ -19,7 +18,7 @@ def test_auto_id_prefix_simple() -> None:
## subtitle
    """
    tokens = Converter()._parse(src)
    md._handle_headings(tokens, on_heading=set_prefix)
    md._handle_headings(tokens, src=src, on_heading=set_prefix)

    assert [
        {**token.attrs, "tag": token.tag}
@@ -44,7 +43,7 @@ def test_auto_id_prefix_repeated() -> None:
## subtitle2
    """
    tokens = Converter()._parse(src)
    md._handle_headings(tokens, on_heading=set_prefix)
    md._handle_headings(tokens, src=src, on_heading=set_prefix)

    assert [
        {**token.attrs, "tag": token.tag}
@@ -76,7 +75,7 @@ def test_auto_id_prefix_maximum_nested() -> None:
## h2.2
    """
    tokens = Converter()._parse(src)
    md._handle_headings(tokens, on_heading=set_prefix)
    md._handle_headings(tokens, src=src, on_heading=set_prefix)

    assert [
        {**token.attrs, "tag": token.tag}
Loading