Commit 030ac5c0 authored by Greg Wilson's avatar Greg Wilson
Browse files

Looking for missing or superfluous images

parent fb178306
Loading
Loading
Loading
Loading
+36 −0
Original line number Diff line number Diff line
@@ -43,6 +43,9 @@ P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$')
# Pattern to match lines ending with whitespace.
P_TRAILING_WHITESPACE = re.compile(r'\s+$')

# Pattern to match figure references in HTML.
P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')

# What kinds of blockquotes are allowed?
KNOWN_BLOCKQUOTES = {
    'callout',
@@ -100,6 +103,7 @@ def main():
    for filename in docs.keys():
        checker = create_checker(args, filename, docs[filename])
        checker.check()
    check_figures(args.source_dir, args.reporter)
    args.reporter.report()


@@ -197,6 +201,38 @@ def check_fileset(source_dir, reporter, filenames_present):
                   seen)


def check_figures(source_dir, reporter):
    """Check that all figures are present and referenced."""

    # Get references.
    try:
        all_figures_html = os.path.join(source_dir, '_includes', 'all_figures.html')
        with open(all_figures_html, 'r') as reader:
            text = reader.read()
        figures = P_FIGURE_REFS.findall(text)
        referenced = [os.path.split(f)[1] for f in figures if '/fig/' in f]
    except FileNotFoundError as e:
        reporter.add(all_figures_html,
                     'File not found')
        return

    # Get actual files.
    fig_dir_path = os.path.join(source_dir, 'fig')
    actual = [f for f in os.listdir(fig_dir_path) if not f.startswith('.')]

    # Report differences.
    unexpected = set(actual) - set(referenced)
    reporter.check(not unexpected,
                   None,
                   'Unexpected image files: {0}',
                   ', '.join(sorted(unexpected)))
    missing = set(referenced) - set(actual)
    reporter.check(not missing,
                   None,
                   'Missing image files: {0}',
                   ', '.join(sorted(missing)))


def create_checker(args, filename, info):
    """Create appropriate checker for file."""