Looking for missing or superfluous images (030ac5c0) · Commits · NDIP / Nova Packages / Nova Carpentry Tutorial

bin/lesson_check.py

+36 −0

Original line number	Diff line number	Diff line
		@@ -43,6 +43,9 @@ P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$')
		# Pattern to match lines ending with whitespace.
		P_TRAILING_WHITESPACE = re.compile(r'\s+$')

		# Pattern to match figure references in HTML.
		P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')

		# What kinds of blockquotes are allowed?
		KNOWN_BLOCKQUOTES = {
		'callout',
		@@ -100,6 +103,7 @@ def main():
		for filename in docs.keys():
		checker = create_checker(args, filename, docs[filename])
		checker.check()
		check_figures(args.source_dir, args.reporter)
		args.reporter.report()


		@@ -197,6 +201,38 @@ def check_fileset(source_dir, reporter, filenames_present):
		seen)


		def check_figures(source_dir, reporter):
		"""Check that all figures are present and referenced."""

		# Get references.
		try:
		all_figures_html = os.path.join(source_dir, '_includes', 'all_figures.html')
		with open(all_figures_html, 'r') as reader:
		text = reader.read()
		figures = P_FIGURE_REFS.findall(text)
		referenced = [os.path.split(f)[1] for f in figures if '/fig/' in f]
		except FileNotFoundError as e:
		reporter.add(all_figures_html,
		'File not found')
		return

		# Get actual files.
		fig_dir_path = os.path.join(source_dir, 'fig')
		actual = [f for f in os.listdir(fig_dir_path) if not f.startswith('.')]

		# Report differences.
		unexpected = set(actual) - set(referenced)
		reporter.check(not unexpected,
		None,
		'Unexpected image files: {0}',
		', '.join(sorted(unexpected)))
		missing = set(referenced) - set(actual)
		reporter.check(not missing,
		None,
		'Missing image files: {0}',
		', '.join(sorted(missing)))


		def create_checker(args, filename, info):
		"""Create appropriate checker for file."""