Unverified Commit d996972f authored by Stephan Lachnit's avatar Stephan Lachnit
Browse files

docs: fix internal references in the PDF

parent 69d6125c
Loading
Loading
Loading
Loading
+41 −1
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ Convert GitLab Flavored Markdown to hugo or LaTeX using pandoc.
import argparse
import functools
import os
import pathlib
import re
import subprocess
import sys
@@ -147,6 +148,24 @@ def latex_convert_hugo_alert(string: str) -> str:
    return re.sub(match, r'\\begin{hugo\2}\n\\textbf{\1}:\n\3\n\\end{hugo\2}', string, flags=re.DOTALL)


def _get_path_relative_to_markdown_tree(file_path: str) -> str:
    """
    Return the path relative to the markdown tree (assuming at most one directory depth).

    For example, `/path/to/docs/01_introduction/_index.md` becomes `01_introduction/_index.md`.

    Args:
        file_path: Path to the file.

    Returns:
        String with the relative path.
    """
    file_path_pl = pathlib.Path(file_path).resolve()
    docs_dir_pl = file_path_pl.parents[1]
    file_path_rel_pl = file_path_pl.relative_to(docs_dir_pl)
    return file_path_rel_pl.as_posix()


def latex_convert_href_references(string: str, file_path: str) -> str:
    """
    Converts the pandoc-converted Markdown references to LaTeX references.
@@ -160,7 +179,28 @@ def latex_convert_href_references(string: str, file_path: str) -> str:
    Returns:
        String formatted in LaTeX.
    """
    return string  # TODO
    # Replace hypertargets with something that include part of the file path to prevent duplicate targets
    file_label_prefix = _get_path_relative_to_markdown_tree(file_path)
    # For the first target we don't want to include the section name as we don't have that in GLFM
    string = re.sub(r'^(\\hypertarget{)([^\n]+?)(}{%\n.+?\\label{)(.+?)(}})',
                    rf'\g<1>{file_label_prefix}\g<3>{file_label_prefix}\g<5>',
                    string, count=1, flags=re.DOTALL)
    # For the following targets we do want the section name to be included
    string = re.sub(r'(.+?\\hypertarget{)([^\n]+?)(}{%\n.+?\\label{)(.+?)(}})',
                    rf'\g<1>{file_label_prefix}-\g<2>\g<3>{file_label_prefix}-\g<4>\g<5>',
                    string, flags=re.DOTALL)
    # We need to adjust references in the same file as well though
    string = re.sub(r'(\\hyperlink{)(.+?)(}{)', rf'\g<1>{file_label_prefix}-\g<2>\g<3>', string)
    # Find references to other files
    refs = re.findall(r'(\\href{)(\.\.?/.+?)(})', string)
    for ref in refs:
        ref_rel_file_path = ref[1]
        ref_full_match = ref[0] + ref[1] + ref[2]
        ref_file_path = os.path.join(os.path.dirname(file_path), ref_rel_file_path)
        ref_replace = _get_path_relative_to_markdown_tree(ref_file_path).replace('\#', '-')
        string = re.sub(re.escape(ref_full_match), rf'\\protect\\hyperlink{{{ref_replace}}}', string)

    return string


def gitlab2hugo(string: str, isindexmd: bool) -> str: