Commit 5ce06257 authored by Andy Boughton's avatar Andy Boughton
Browse files

Changes to link text validation per #58 and #90. Validate link text in...

Changes to link text validation per #58 and #90. Validate link text in index.md (always), or only in headings (reference.md and instructors.md)

Demote msg level when http links skipped

Strip anchor from filenames in error messages
parent 40c84e19
Loading
Loading
Loading
Loading
+79 −34
Original line number Diff line number Diff line
@@ -10,7 +10,6 @@ Contains validators for several kinds of template.

Call at command line with flag -h to see options and usage instructions.
"""
from __future__ import print_function

import argparse
import glob
@@ -203,20 +202,17 @@ class MarkdownValidator(object):
        return (len(missing_headings) == 0) and \
               valid_order and no_extra and correct_level

    def _validate_one_link(self, link_node):
        """Logic to validate a single external asset (image or link)

    # Link validation methods
    def _validate_one_html_link(self, link_node, check_text=False):
        """
        Any local html file being linked was generated as part of the lesson.
        Therefore, file links (.html) must have a Markdown file
            in the expected folder.

        The title of the linked Markdown document should match the link text.

        For other assets (links or images), just verify that a file exists
        """
        dest, link_text = self.ast.get_link_info(link_node)

        if re.match(r"^[\w,\s-]+\.(html?)", dest, re.IGNORECASE):
        # HTML files in same folder are made from Markdown; special tests
        fn = dest.split("#")[0]  # Split anchor name from filename
        expected_md_fn = os.path.splitext(fn)[0] + os.extsep + "md"
@@ -227,9 +223,10 @@ class MarkdownValidator(object):
                "In {0}: "
                "The document links to {1}, but could not find "
                "the expected markdown file {2}".format(
                        self.filename, dest, expected_md_path))
                    self.filename, fn, expected_md_path))
            return False

        if check_text is True:
            # If file exists, parse and validate link text = node title
            with open(expected_md_path, 'rU') as link_dest_file:
                dest_contents = link_dest_file.read()
@@ -247,36 +244,67 @@ class MarkdownValidator(object):
                        self.filename, dest,
                        link_text, dest_page_title))
                return False
        elif not re.match(r"^((https?|ftp)://)", dest, re.IGNORECASE)\
        return True

    def _validate_one_link(self, link_node, check_text=False):
        """Logic to validate a single link to a file asset

        Performs special checks for links to a local markdown file.

        For links or images, just verify that a file exists.
        """
        dest, link_text = self.ast.get_link_info(link_node)

        if re.match(r"^[\w,\s-]+\.(html?)", dest, re.IGNORECASE):
            # Validate local html links have matching md file
            return self._validate_one_html_link(link_node,
                                                check_text=check_text)
        elif not re.match(r"^((https?|ftp)://.+)", dest, re.IGNORECASE)\
                and not re.match(r"^#.*", dest):
            # If not web URL, and not anchor on same page, then
            #  verify that local file exists
            dest_path = os.path.join(self.lesson_dir, dest)
            dest_path = dest_path.split("#")[0]  # Split anchor from filename
            if not os.path.isfile(dest_path):
                fn = dest.split("#")[0]  # Split anchor name from filename
                logging.error(
                    "In {0}: "
                    "Could not find the linked asset file "
                    "{1} in {2}. If this is a URL, it must be "
                    "prefixed with http(s):// or ftp://.".format(
                        self.filename, dest, dest_path))
                        self.filename, fn, dest_path))
                return False
        else:
            logging.warning(
            logging.debug(
                "In {0}: "
                "Skipped validation of link {1}".format(self.filename, dest))
        return True

    def _validate_links(self, links_to_skip=()):
    def _partition_links(self):
        """Fetch links in document. If this template has special requirements
        for link text (eg only some links' text should match dest page title),
        filter the list accordingly.

        Default behavior: don't check the text of any links"""
        check_text = []
        no_check_text = self.ast.find_external_links()

        return check_text, no_check_text

    def _validate_links(self):
        """Validate all references to external content

        This includes links AND images: these are the two types of node that
        CommonMark assigns a .destination property"""
        links = self.ast.find_external_links()
        check_text, no_check_text = self._partition_links()

        valid = True
        for link_node in links:
            if link_node.destination not in links_to_skip:
                res = self._validate_one_link(link_node)
        for link_node in check_text:
            res = self._validate_one_link(link_node, check_text=True)
            valid = valid and res

        for link_node in no_check_text:
            res = self._validate_one_link(link_node, check_text=False)
            valid = valid and res
        return valid

@@ -309,6 +337,11 @@ class IndexPageValidator(MarkdownValidator):
    DOC_HEADERS = {'layout': vh.is_str,
                   'title': vh.is_str}

    def _partition_links(self):
        """Check the text of every link in index.md"""
        check_text = self.ast.find_external_links()
        return check_text, []

    def _validate_intro_section(self):
        """Validate the intro section

@@ -339,12 +372,6 @@ class IndexPageValidator(MarkdownValidator):
                    self.filename))
        return intro_section and prereqs_tests

    def _validate_links(self, links_to_skip=('motivation.html',
                                             'reference.html',
                                             'discussion.html',
                                             'instructors.html')):
        return super(IndexPageValidator, self)._validate_links(links_to_skip)

    def _run_tests(self):
        parent_tests = super(IndexPageValidator, self)._run_tests()
        tests = [self._validate_intro_section()]
@@ -418,6 +445,15 @@ class ReferencePageValidator(MarkdownValidator):
                   "title": vh.is_str,
                   "subtitle": vh.is_str}

    def _partition_links(self):
        """For reference.md, only check that text of link matches
        dest page subtitle if the link is in a heading"""
        all_links = self.ast.find_external_links()
        check_text = self.ast.find_external_links(
            parent_crit=self.ast.is_heading)
        dont_check_text = [n for n in all_links if n not in check_text]
        return check_text, dont_check_text

    def _validate_glossary_entry(self, glossary_entry):
        """Validate glossary entry

@@ -491,6 +527,15 @@ class InstructorPageValidator(MarkdownValidator):
                   "title": vh.is_str,
                   "subtitle": vh.is_str}

    def _partition_links(self):
        """For instructors.md, only check that text of link matches
        dest page subtitle if the link is in a heading"""
        all_links = self.ast.find_external_links()
        check_text = self.ast.find_external_links(
            parent_crit=self.ast.is_heading)
        dont_check_text = [n for n in all_links if n not in check_text]
        return check_text, dont_check_text


class LicensePageValidator(MarkdownValidator):
    """Validate LICENSE.md: user should not edit this file"""
+46 −0
Original line number Diff line number Diff line
@@ -202,6 +202,18 @@ Paragraph of introductory material.
        self.assertFalse(validator._validate_intro_section())

    # TESTS INVOLVING LINKS TO OTHER CONTENT
    def test_should_check_text_of_all_links_in_index(self):
        """Text of every local-html link in index.md should
        match dest page title"""
        validator = self._create_validator("""
## [This link is in a heading](reference.html)
[Topic Title One](01-one.html#anchor)""")
        links = validator.ast.find_external_links()
        check_text, dont_check_text = validator._partition_links()

        self.assertEqual(len(dont_check_text), 0)
        self.assertEqual(len(check_text), 2)

    def test_file_links_validate(self):
        """Verify that all links in a sample file validate.
        Involves checking for example files; may fail on "core" branch"""
@@ -303,6 +315,18 @@ minutes: not a number
Some text""")
        self.assertFalse(validator._validate_has_no_headings())

    def test_should_not_check_text_of_links_in_topic(self):
        """Never check that text of local-html links in topic
        matches dest title """
        validator = self._create_validator("""
## [This link is in a heading](reference.html)
[Topic Title One](01-one.html#anchor)""")
        links = validator.ast.find_external_links()
        check_text, dont_check_text = validator._partition_links()

        self.assertEqual(len(dont_check_text), 2)
        self.assertEqual(len(check_text), 0)

    def test_sample_file_passes_validation(self):
        sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
        res = sample_validator.validate()
@@ -385,6 +409,28 @@ class TestInstructorPage(BaseTemplateTest):
    SAMPLE_FILE = os.path.join(MARKDOWN_DIR, "instructors.md")
    VALIDATOR = check.InstructorPageValidator

    def test_should_selectively_check_text_of_links_in_topic(self):
        """Only verify that text of local-html links in topic
        matches dest title if the link is in a heading"""
        validator = self._create_validator("""
## [Reference](reference.html)

[Topic Title One](01-one.html#anchor)""")
        check_text, dont_check_text = validator._partition_links()

        self.assertEqual(len(dont_check_text), 1)
        self.assertEqual(len(check_text), 1)

    def test_link_dest_bad_while_text_ignored(self):
        validator = self._create_validator("""
[ignored text](nonexistent.html)""")
        self.assertFalse(validator._validate_links())

    def test_link_dest_good_while_text_ignored(self):
        validator = self._create_validator("""
[ignored text](01-one.html)""")
        self.assertTrue(validator._validate_links())

    def test_sample_file_passes_validation(self):
        sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
        res = sample_validator.validate()
+9 −3
Original line number Diff line number Diff line
@@ -121,21 +121,27 @@ class CommonMarkHelper(object):

        return dest, link_text

    def find_external_links(self, ast_node=None):
    def find_external_links(self, ast_node=None, parent_crit=None):
        """Recursive function that locates all references to external content
         under specified node. (links or images)"""
        ast_node = ast_node or self.data
        if parent_crit is None:
            # User can optionally provide a function to filter link list
            # based on where link appears. (eg, only links in headings)
            # If no filter is provided, accept all links in that node.
            parent_crit = lambda n: True

        # Link can be node itself, or hiding in inline content
        links = [n for n in ast_node.inline_content
                 if self.is_external(n)]
                 if self.is_external(n) and parent_crit(ast_node)]

        if self.is_external(ast_node):
            links.append(ast_node)

        # Also look for links in sub-nodes
        for n in ast_node.children:
            links.extend(self.find_external_links(n))
            links.extend(self.find_external_links(n,
                                                  parent_crit=parent_crit))

        return links