Commit e3df3f5b authored by Vacaliuc, Bogdan's avatar Vacaliuc, Bogdan
Browse files

md2pdf: substitute ${BL} in page header from Instrument metadata



The CSS `content: "${BL} — Motion Control"` line was rendering literally
because the raw Python string doesn't do substitution and CSS has no
variable syntax of its own. Extract the BL token from the document's
`**Instrument:**` metadata line (handling both 'BL4B' and
'BL4B (Liquids Reflectometer)' forms) and do a plain string replace on
the CSS before passing it to WeasyPrint — simplest thing that works
given the CSS body is full of '{', '}', and '$' characters that would
trip up str.format / string.Template.

Co-Authored-By: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent 618dda22
Loading
Loading
Loading
Loading
+25 −1
Original line number Diff line number Diff line
@@ -679,6 +679,24 @@ HTML_DOC = """<!DOCTYPE html>
"""


def _extract_beamline(metadata: list[tuple[str, str]]) -> str:
    """Return the short beamline identifier (e.g. 'BL4B') extracted from the
    document's `**Instrument:**` metadata line.

    Handles both terse values (`BL4B`) and descriptive ones
    (`BL4B (Liquids Reflectometer)`) by picking out the first `BL\\w+` token.
    Returns the whole stripped value if no such token is present, and an
    empty string if there is no Instrument entry at all — in which case the
    `${BL}` CSS placeholder renders as a lone em-dash and the running
    header still looks sensible.
    """
    for label, value in metadata:
        if label.strip().lower() == "instrument":
            m = re.search(r"BL\w+", value)
            return m.group(0) if m else value.strip()
    return ""


def _render_metadata_table(metadata: list[tuple[str, str]]) -> str:
    if not metadata:
        return ""
@@ -724,10 +742,16 @@ def convert(md_path: Path, pdf_path: Path) -> None:
    toc_html = _build_toc(body_html)
    metadata_html = _render_metadata_table(metadata)

    # Substitute `${BL}` placeholders in the CSS with the beamline identifier
    # from the document's Instrument metadata. Plain `str.replace` is used
    # (not str.format / string.Template) because the CSS body is full of `{}`
    # and `$` characters that would collide with richer templating schemes.
    css = CSS.replace("${BL}", _extract_beamline(metadata))

    full_html = HTML_DOC.format(
        title_text=title,
        title_html=title_html,
        css=CSS,
        css=css,
        metadata_html=metadata_html,
        toc_html=toc_html,
        body_html=body_html,