Unverified Commit 014ca1cc authored by Rebecca Turner's avatar Rebecca Turner
Browse files

nixos-render-docs: Display relevant source for errors

Most of the error messages in `nixos-render-docs` produce line numbers,
which would be really helpful if the sources being rendered weren't
often generated from `.nix` files themselves!

While the Python `markdown_it` library doesn't give us an obvious way to
track source provenance like this (i.e., mapping source spans in the
generated files to source spans in the input files), we can make the
errors a lot clearer by displaying the relevant lines from the source.

Below, an error message caused by a missing `:::` is made obvious by
displaying the span that composes the block being rendered, rather than
pointing only at the offending title.

Before:

```
caused by:
        unexpected non-title heading in example in line 87
```

After:

```
caused by:
unexpected non-title heading in `:::{.example}`; are you missing a `:::`?
Note: blocks like `:::{.example}` are only allowed to contain a single heading in order to simplify TOC generation.

`:::{.example}` block at lines 76-112:
  73 ┆ ```
  74 ┆
  75 ┆ ### Examples
  76 ┃ :::{.example}
  77 ┃ #### `lib.debug.traceValFn` usage example
  78 ┃
  79 ┃ ```nix
  80 ┃ traceValFn (v: "mystring ${v}") "foo"
  81 ┃ trace: mystring foo
  82 ┃ => "foo"
  83 ┃ ```
  84 ┃
  85 ┃ Located at [lib/debug.nix:106](https://github.com/NixOS/nixpkgs/blob/master/lib/debug.nix#L106) in `<nixpkgs>`.
     ...
 103 ┃ ### Examples
 104 ┃ :::{.example}
 105 ┃ #### `lib.debug.traceVal` usage example
 106 ┃
 107 ┃ ```nix
 108 ┃ traceVal 42
 109 ┃ # trace: 42
 110 ┃ => 42
 111 ┃ ```
 112 ┃
 113 ┆ :::
 114 ┆
 115 ┆ Located at [lib/debug.nix:135](https://github.com/NixOS/nixpkgs/blob/master/lib/debug.nix#L135) in `<nixpkgs>`.

Unexpected heading at line 87:
  84 ┆
  85 ┆ Located at [lib/debug.nix:106](https://github.com/NixOS/nixpkgs/blob/master/lib/debug.nix#L106) in `<nixpkgs>`.
  86 ┆
  87 ┃ ## `lib.debug.traceVal` {#function-library-lib.debug.traceVal}
  88 ┆
  89 ┆ Trace the supplied value and return it.
  90 ┆
```
parent 599ee121
Loading
Loading
Loading
Loading
+88 −28
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ from .html import HTMLRenderer, UnresolvedXrefError
from .manual_structure import check_structure, FragmentType, is_include, make_xml_id, TocEntry, TocEntryType, XrefTarget
from .md import Converter, Renderer
from .redirects import Redirects
from .src_error import SrcError

class BaseConverter(Converter[md.TR], Generic[md.TR]):
    # per-converter configuration for ns:arg=value arguments to include blocks, following
@@ -44,14 +45,18 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
    def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None:
        pass

    def _handle_headings(self, tokens: list[Token], *, on_heading: Callable[[Token,str],None]) -> None:
    def _handle_headings(self, tokens: list[Token], *, src: str, on_heading: Callable[[Token,str],None]) -> None:
        # Headings in a globally numbered order
        # h1 to h6
        curr_heading_pos: list[int] = []
        for token in tokens:
            if token.type == "heading_open":
                if token.tag not in ["h1", "h2", "h3", "h4", "h5", "h6"]:
                    raise RuntimeError(f"Got invalid heading tag {token.tag} in line {token.map[0] + 1 if token.map else 'NOT FOUND'}. Only h1 to h6 headings are allowed.")
                    raise SrcError(
                        src=src,
                        description=f"Got invalid heading tag {token.tag!r}. Only h1 to h6 headings are allowed.",
                        token=token,
                    )

                idx = int(token.tag[1:]) - 1

@@ -75,10 +80,10 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
                if "id" not in token.attrs:
                    token.attrs["id"] = f"{auto_id_prefix}-{ident}"

            self._handle_headings(tokens, on_heading=set_token_ident)
            self._handle_headings(tokens, src=src, on_heading=set_token_ident)


        check_structure(self._current_type[-1], tokens)
        check_structure(src, self._current_type[-1], tokens)
        for token in tokens:
            if not is_include(token):
                continue
@@ -89,35 +94,46 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
            typ = directive[0]
            if typ == 'options':
                token.type = 'included_options'
                self._process_include_args(token, args, self.INCLUDE_OPTIONS_ALLOWED_ARGS)
                self._parse_options(token, args)
                self._process_include_args(src, token, args, self.INCLUDE_OPTIONS_ALLOWED_ARGS)
                self._parse_options(src, token, args)
            else:
                fragment_type = typ.removesuffix('s')
                if fragment_type not in get_args(FragmentType):
                    raise RuntimeError(f"unsupported structural include type '{typ}'")
                    raise SrcError(
                        src=src,
                        description=f"unsupported structural include type {typ!r}",
                        token=token,
                    )
                self._current_type.append(cast(FragmentType, fragment_type))
                token.type = 'included_' + typ
                self._process_include_args(token, args, self.INCLUDE_FRAGMENT_ALLOWED_ARGS)
                self._parse_included_blocks(token, args)
                self._process_include_args(src, token, args, self.INCLUDE_FRAGMENT_ALLOWED_ARGS)
                self._parse_included_blocks(src, token, args)
                self._current_type.pop()
        return tokens

    def _process_include_args(self, token: Token, args: dict[str, str], allowed: set[str]) -> None:
    def _process_include_args(self, src: str, token: Token, args: dict[str, str], allowed: set[str]) -> None:
        ns = self.INCLUDE_ARGS_NS + ":"
        args = { k[len(ns):]: v for k, v in args.items() if k.startswith(ns) }
        if unknown := set(args.keys()) - allowed:
            assert token.map
            raise RuntimeError(f"unrecognized include argument in line {token.map[0] + 1}", unknown)
            raise SrcError(
                src=src,
                description=f"unrecognized include argument(s): {unknown}",
                token=token,
            )
        token.meta['include-args'] = args

    def _parse_included_blocks(self, token: Token, block_args: dict[str, str]) -> None:
    def _parse_included_blocks(self, src: str, token: Token, block_args: dict[str, str]) -> None:
        assert token.map
        included = token.meta['included'] = []
        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 1):
            line = line.strip()
            path = self._base_paths[-1].parent / line
            if path in self._base_paths:
                raise RuntimeError(f"circular include found in line {lnum}")
                raise SrcError(
                    src=src,
                    description="circular include found",
                    token=token,
                )
            try:
                self._base_paths.append(path)
                with open(path, 'r') as f:
@@ -130,29 +146,57 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
                    included.append((tokens, path))
                self._base_paths.pop()
            except Exception as e:
                raise RuntimeError(f"processing included file {path} from line {lnum}") from e
                raise SrcError(
                    src=src,
                    description=f"processing included file {path}",
                    token=lnum,
                ) from e

    def _parse_options(self, token: Token, block_args: dict[str, str]) -> None:
    def _parse_options(self, src: str, token: Token, block_args: dict[str, str]) -> None:
        assert token.map

        items = {}
        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
        for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 1):
            if len(args := line.split(":", 1)) != 2:
                raise RuntimeError(f"options directive with no argument in line {lnum}")
                raise SrcError(
                    src=src,
                    description=f"options directive with no argument",
                    tokens={
                        "Directive": lnum,
                        "Block": token,
                    },
                )
            (k, v) = (args[0].strip(), args[1].strip())
            if k in items:
                raise RuntimeError(f"duplicate options directive {k} in line {lnum}")
                raise SrcError(
                    src=src,
                    description=f"duplicate options directive {k!r}",
                    tokens={
                        "Directive": lnum,
                        "Block": token,
                    },
                )
            items[k] = v

        try:
            id_prefix = items.pop('id-prefix')
            varlist_id = items.pop('list-id')
            source = items.pop('source')
        except KeyError as e:
            raise RuntimeError(f"options directive {e} missing in block at line {token.map[0] + 1}")
            raise SrcError(
                src=src,
                description=f"options directive {e} missing",
                tokens={
                    "Block": token,
                },
            ) from e

        if items.keys():
            raise RuntimeError(
                f"unsupported options directives in block at line {token.map[0] + 1}",
                " ".join(items.keys()))
            raise SrcError(
                src=src,
                description=f"unsupported options directives: {set(items.keys())}",
                token=token,
            )

        try:
            with open(self._base_paths[-1].parent / source, 'r') as f:
@@ -160,7 +204,11 @@ class BaseConverter(Converter[md.TR], Generic[md.TR]):
                token.meta['list-id'] = varlist_id
                token.meta['source'] = json.load(f)
        except Exception as e:
            raise RuntimeError(f"processing options block in line {token.map[0] + 1}") from e
            raise SrcError(
                src=src,
                description="processing options block",
                token=token,
            ) from e

class RendererMixin(Renderer):
    _toplevel_tag: str
@@ -542,14 +590,26 @@ class HTMLConverter(BaseConverter[ManualHTMLRenderer]):
                continue
            assert token.map
            if len(token.meta['included']) == 0:
                raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is empty!")
                raise SrcError(
                    src=src,
                    description=f"redirection target {into!r} is empty!",
                    token=token,
                )
            # we use blender-style //path to denote paths relative to the origin file
            # (usually index.html). this makes everything a lot easier and clearer.
            if not into.startswith("//") or '/' in into[2:]:
                raise RuntimeError("html:into-file must be a relative-to-origin //filename", into)
                raise SrcError(
                    src=src,
                    description=f"html:into-file must be a relative-to-origin //filename: {into}",
                    token=token,
                )
            into = token.meta['include-args']['into-file'] = into[2:]
            if into in self._redirection_targets:
                raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is already in use")
                raise SrcError(
                    src=src,
                    description=f"redirection target {into} is already in use",
                    token=token,
                )
            self._redirection_targets.add(into)
        return tokens

+47 −28
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ from typing import cast, get_args, Iterable, Literal, Sequence
from markdown_it.token import Token

from .utils import Freezeable
from .src_error import SrcError

# FragmentType is used to restrict structural include blocks.
FragmentType = Literal['preface', 'part', 'chapter', 'section', 'appendix']
@@ -21,37 +22,45 @@ def is_include(token: Token) -> bool:

# toplevel file must contain only the title headings and includes, anything else
# would cause strange rendering.
def _check_book_structure(tokens: Sequence[Token]) -> None:
def _check_book_structure(src: str, tokens: Sequence[Token]) -> None:
    for token in tokens[6:]:
        if not is_include(token):
            assert token.map
            raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, "
                               "expected structural include")
            raise SrcError(
                src=src,
                description=f"unexpected content; expected structural include",
                token=token,
            )

# much like books, parts may not contain headings other than their title heading.
# this is a limitation of the current renderers and TOC generators that do not handle
# this case well even though it is supported in docbook (and probably supportable
# anywhere else).
def _check_part_structure(tokens: Sequence[Token]) -> None:
    _check_fragment_structure(tokens)
def _check_part_structure(src: str,tokens: Sequence[Token]) -> None:
    _check_fragment_structure(src, tokens)
    for token in tokens[3:]:
        if token.type == 'heading_open':
            assert token.map
            raise RuntimeError(f"unexpected heading in line {token.map[0] + 1}")
            raise SrcError(
                src=src,
                description="unexpected heading",
                token=token,
            )

# two include blocks must either be adjacent or separated by a heading, otherwise
# we cannot generate a correct TOC (since there'd be nothing to link to between
# the two includes).
def _check_fragment_structure(tokens: Sequence[Token]) -> None:
def _check_fragment_structure(src: str, tokens: Sequence[Token]) -> None:
    for i, token in enumerate(tokens):
        if is_include(token) \
           and i + 1 < len(tokens) \
           and not (is_include(tokens[i + 1]) or tokens[i + 1].type == 'heading_open'):
            assert token.map
            raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, "
                               "expected heading or structural include")
            raise SrcError(
                src=src,
                description="unexpected content; expected heading or structural include",
                token=token,
            )

def check_structure(kind: TocEntryType, tokens: Sequence[Token]) -> None:
def check_structure(src: str, kind: TocEntryType, tokens: Sequence[Token]) -> None:
    wanted = { 'h1': 'title' }
    wanted |= { 'h2': 'subtitle' } if kind == 'book' else {}
    for (i, (tag, role)) in enumerate(wanted.items()):
@@ -59,17 +68,21 @@ def check_structure(kind: TocEntryType, tokens: Sequence[Token]) -> None:
            raise RuntimeError(f"missing {role} ({tag}) heading")
        token = tokens[3 * i]
        if token.type != 'heading_open' or token.tag != tag:
            assert token.map
            raise RuntimeError(f"expected {role} ({tag}) heading in line {token.map[0] + 1}", token)
            raise SrcError(
                src=src,
                description=f"expected {role} ({tag}) heading",
                token=token,
            )
    for t in tokens[3 * len(wanted):]:
        if t.type != 'heading_open' or not (role := wanted.get(t.tag, '')):
            continue
        assert t.map
        raise RuntimeError(
            f"only one {role} heading ({t.markup} [text...]) allowed per "
            f"{kind}, but found a second in line {t.map[0] + 1}. "
            "please remove all such headings except the first or demote the subsequent headings.",
            t)
        raise SrcError(
            src=src,
            description=f"only one {role} heading ({t.markup} [text...]) allowed per "
            f"{kind}, but found a second. "
            "Please remove all such headings except the first or demote the subsequent headings.",
            token=t,
        )

    last_heading_level = 0
    for token in tokens:
@@ -80,22 +93,28 @@ def check_structure(kind: TocEntryType, tokens: Sequence[Token]) -> None:
        # every other headings needs one too. we need this to build a TOC and to
        # provide stable links if the manual changes shape.
        if 'id' not in token.attrs and (kind != 'book' or token.tag != 'h2'):
            assert token.map
            raise RuntimeError(f"heading in line {token.map[0] + 1} does not have an id")
            raise SrcError(
                src=src,
                description=f"heading does not have an id",
                token=token,
            )

        level = int(token.tag[1:]) # because tag = h1..h6
        if level > last_heading_level + 1:
            assert token.map
            raise RuntimeError(f"heading in line {token.map[0] + 1} skips one or more heading levels, "
                               "which is currently not allowed")
            raise SrcError(
                src=src,
                description=f"heading skips one or more heading levels, "
                               "which is currently not allowed",
                token=token,
            )
        last_heading_level = level

    if kind == 'book':
        _check_book_structure(tokens)
        _check_book_structure(src, tokens)
    elif kind == 'part':
        _check_part_structure(tokens)
        _check_part_structure(src, tokens)
    else:
        _check_fragment_structure(tokens)
        _check_fragment_structure(src, tokens)

@dc.dataclass(frozen=True)
class XrefTarget:
+29 −9
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ import dataclasses
import re

from .types import RenderFn
from .src_error import SrcError

import markdown_it
from markdown_it.token import Token
@@ -446,12 +447,16 @@ def _footnote_ids(md: markdown_it.MarkdownIt) -> None:
       generate here are derived from the footnote label, making numeric footnote
       labels invalid.
    """
    def generate_ids(tokens: Sequence[Token]) -> None:
    def generate_ids(src: str, tokens: Sequence[Token]) -> None:
        for token in tokens:
            if token.type == 'footnote_open':
                if token.meta["label"][:1].isdigit():
                    assert token.map
                    raise RuntimeError(f"invalid footnote label in line {token.map[0] + 1}")
                    raise SrcError(
                        src=src,
                        description="invalid footnote label",
                        token=token,
                    )
                token.attrs['id'] = token.meta["label"]
            elif token.type == 'footnote_anchor':
                token.meta['target'] = f'{token.meta["label"]}.__back.{token.meta["subId"]}'
@@ -460,10 +465,10 @@ def _footnote_ids(md: markdown_it.MarkdownIt) -> None:
                token.meta['target'] = token.meta["label"]
            elif token.type == 'inline':
                assert token.children is not None
                generate_ids(token.children)
                generate_ids(src, token.children)

    def footnote_ids(state: markdown_it.rules_core.StateCore) -> None:
        generate_ids(state.tokens)
        generate_ids(state.src, state.tokens)

    md.core.ruler.after("footnote_tail", "footnote_ids", footnote_ids)

@@ -537,7 +542,7 @@ def _block_titles(block: str) -> Callable[[markdown_it.MarkdownIt], None]:
    non-title heading since those would make toc generation extremely complicated.
    """
    def block_titles(state: markdown_it.rules_core.StateCore) -> None:
        in_example = [False]
        in_example = [None]
        for i, token in enumerate(state.tokens):
            if token.type == open:
                if state.tokens[i + 1].type == 'heading_open':
@@ -545,14 +550,29 @@ def _block_titles(block: str) -> Callable[[markdown_it.MarkdownIt], None]:
                    state.tokens[i + 1].type = title_open
                    state.tokens[i + 3].type = title_close
                else:
                    assert token.map
                    raise RuntimeError(f"found {block} without title in line {token.map[0] + 1}")
                in_example.append(True)
                    raise SrcError(
                        src=state.src,
                        description=f"found {block} without title",
                        token=token,
                    )
                in_example.append(token)
            elif token.type == close:
                in_example.pop()
            elif token.type == 'heading_open' and in_example[-1]:
                assert token.map
                raise RuntimeError(f"unexpected non-title heading in {block} in line {token.map[0] + 1}")
                started_at = in_example[-1]

                block_display = ":::{." + block + "}"

                raise SrcError(
                    description=f"unexpected non-title heading in `{block_display}`; are you missing a `:::`?\n"
                        f"Note: blocks like `{block_display}` are only allowed to contain a single heading in order to simplify TOC generation.",
                    src=state.src,
                    tokens={
                        f"`{block_display}` block": started_at,
                        "Unexpected heading": token,
                    },
                )

    def do_add(md: markdown_it.MarkdownIt) -> None:
        md.core.ruler.push(f"{block}_titles", block_titles)
+154 −0
Original line number Diff line number Diff line
from typing import Tuple

from markdown_it.token import Token

LineSpan = int | Tuple[int, int] | Token


class SrcError(Exception):
    """An error associated with a source file and location."""

    def __init__(
        self,
        *,
        description: str,
        src: str,
        tokens: dict[str, LineSpan] | None = None,
        token: LineSpan | None = None,
    ):
        """Create a new `SrcError`.

        Arguments:
        - `description`: A description of the error.

        - `src`: The source text the `token`s are from.

        - `tokens`: A dictionary from descriptions to `Tokens` (or lines) associated with
          the error.

          The tokens are used for their source location.

          A location like ` at lines 6-9` will be added to the description.

          If the description is empty, the location will be described as `At
          lines 6-9`.

        - `token`: Shorthand for `tokens={"": token}`.
        """
        self.src = src

        tokens = tokens or {}
        if token:
            tokens[""] = token
        self.tokens = tokens

        self.description = description

        self.message = _src_error_str(src=src, tokens=tokens, description=description)

        super().__init__(self.message)

    def __str__(self) -> str:
        return self.message


def _get_line_span(location: LineSpan) -> Tuple[int, int] | None:
    if isinstance(location, Token):
        if location.map:
            return (location.map[0], location.map[1])
        else:
            return None
    elif isinstance(location, int):
        return (location, location + 1)
    else:
        return location


def _src_error_str(*, src: str, tokens: dict[str, LineSpan], description: str) -> str:
    """Python exceptions are a bit goofy and need a `message` string attribute
    right away, so we basically need a way to generate the string before we
    actually finish `__init__`.
    """

    result = [description]

    src_lines = src.splitlines()

    for description, token in tokens.items():
        result.append("\n\n\x1b[33m")

        if description:
            result.append(description)
            result.append(" at ")
        else:
            result.append("At ")

        maybe_span = _get_line_span(token)

        if not maybe_span:
            result.append("unknown location\x1b[0m")
            continue

        start, end = maybe_span
        # Note: `end` is exclusive, so single-line spans are represented as
        # `(n, n+1)`.
        if start == end - 1:
            result.append("line ")
            result.append(str(start + 1))
        else:
            result.append("lines ")
            result.append(str(start + 1))
            result.append("-")
            result.append(str(end))

        result.append(":\x1b[0m\n")

        result.append(src_excerpt(src_lines=src_lines, start=start, end=end))

    return "".join(result)


def src_excerpt(
    *, src_lines: list[str], start: int, end: int, context: int = 3, max_lines: int = 20
) -> str:
    output = []

    def clamp_line(line_num: int) -> int:
        return max(0, min(len(src_lines), line_num))

    def add_line(line_num: int, *, is_context: bool) -> None:
        # Lines start with the line number, dimmed.
        prefix = "\x1b[2m\x1b[37m" + format(line_num + 1, " 4d") + "\x1b[0m"

        # Context lines are prefixed with a dotted line, non-context lines are
        # prefixed with a bold yellow line.
        if is_context:
            # Note: No reset here because context lines are dimmed.
            prefix += " \x1b[2m\x1b[37m┆ "
        else:
            prefix += " \x1b[1m\x1b[33m┃\x1b[0m "

        output.append(prefix + src_lines[line_num] + "\x1b[0m")

    def add_lines(start: int, end: int, is_context: bool) -> None:
        for i in range(clamp_line(start), clamp_line(end)):
            add_line(i, is_context=is_context)

    if end - start > max_lines:
        # If we have more than `max_lines` in the range, show a `...` in the middle.
        half_max_lines = max_lines // 2

        add_lines(start - context, start, is_context=True)
        add_lines(start, start + half_max_lines, is_context=False)

        output.append("     \x1b[2m\x1b[37m...\x1b[0m")

        add_lines(end - half_max_lines, end, is_context=False)
        add_lines(end, end + context, is_context=True)

    else:
        add_lines(start - context, start, is_context=True)
        add_lines(start, end, is_context=False)
        add_lines(end, end + context, is_context=True)

    return "\n".join(output)
+3 −4
Original line number Diff line number Diff line
@@ -3,7 +3,6 @@ from pathlib import Path
from markdown_it.token import Token
from nixos_render_docs.manual import HTMLConverter, HTMLParameters
from nixos_render_docs.md import Converter
from nixos_render_docs.redirects import Redirects

auto_id_prefix="TEST_PREFIX"
def set_prefix(token: Token, ident: str) -> None:
@@ -19,7 +18,7 @@ def test_auto_id_prefix_simple() -> None:
## subtitle
    """
    tokens = Converter()._parse(src)
    md._handle_headings(tokens, on_heading=set_prefix)
    md._handle_headings(tokens, src=src, on_heading=set_prefix)

    assert [
        {**token.attrs, "tag": token.tag}
@@ -44,7 +43,7 @@ def test_auto_id_prefix_repeated() -> None:
## subtitle2
    """
    tokens = Converter()._parse(src)
    md._handle_headings(tokens, on_heading=set_prefix)
    md._handle_headings(tokens, src=src, on_heading=set_prefix)

    assert [
        {**token.attrs, "tag": token.tag}
@@ -76,7 +75,7 @@ def test_auto_id_prefix_maximum_nested() -> None:
## h2.2
    """
    tokens = Converter()._parse(src)
    md._handle_headings(tokens, on_heading=set_prefix)
    md._handle_headings(tokens, src=src, on_heading=set_prefix)

    assert [
        {**token.attrs, "tag": token.tag}
Loading