Commit 02430798 authored by abought's avatar abought
Browse files

Merge pull request #113 from abought/link_check_text

Link text validation changes
parents 40c84e19 7c92facb
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
*~
*.pyc
.DS_Store
_site
+79 −34
Original line number Diff line number Diff line
@@ -10,7 +10,6 @@ Contains validators for several kinds of template.

Call at command line with flag -h to see options and usage instructions.
"""
from __future__ import print_function

import argparse
import glob
@@ -203,20 +202,17 @@ class MarkdownValidator(object):
        return (len(missing_headings) == 0) and \
               valid_order and no_extra and correct_level

    def _validate_one_link(self, link_node):
        """Logic to validate a single external asset (image or link)

    # Link validation methods
    def _validate_one_html_link(self, link_node, check_text=False):
        """
        Any local html file being linked was generated as part of the lesson.
        Therefore, file links (.html) must have a Markdown file
            in the expected folder.

        The title of the linked Markdown document should match the link text.

        For other assets (links or images), just verify that a file exists
        """
        dest, link_text = self.ast.get_link_info(link_node)

        if re.match(r"^[\w,\s-]+\.(html?)", dest, re.IGNORECASE):
        # HTML files in same folder are made from Markdown; special tests
        fn = dest.split("#")[0]  # Split anchor name from filename
        expected_md_fn = os.path.splitext(fn)[0] + os.extsep + "md"
@@ -227,9 +223,10 @@ class MarkdownValidator(object):
                "In {0}: "
                "The document links to {1}, but could not find "
                "the expected markdown file {2}".format(
                        self.filename, dest, expected_md_path))
                    self.filename, fn, expected_md_path))
            return False

        if check_text is True:
            # If file exists, parse and validate link text = node title
            with open(expected_md_path, 'rU') as link_dest_file:
                dest_contents = link_dest_file.read()
@@ -247,36 +244,67 @@ class MarkdownValidator(object):
                        self.filename, dest,
                        link_text, dest_page_title))
                return False
        elif not re.match(r"^((https?|ftp)://)", dest, re.IGNORECASE)\
        return True

    def _validate_one_link(self, link_node, check_text=False):
        """Logic to validate a single link to a file asset

        Performs special checks for links to a local markdown file.

        For links or images, just verify that a file exists.
        """
        dest, link_text = self.ast.get_link_info(link_node)

        if re.match(r"^[\w,\s-]+\.(html?)", dest, re.IGNORECASE):
            # Validate local html links have matching md file
            return self._validate_one_html_link(link_node,
                                                check_text=check_text)
        elif not re.match(r"^((https?|ftp)://.+)", dest, re.IGNORECASE)\
                and not re.match(r"^#.*", dest):
            # If not web URL, and not anchor on same page, then
            #  verify that local file exists
            dest_path = os.path.join(self.lesson_dir, dest)
            dest_path = dest_path.split("#")[0]  # Split anchor from filename
            if not os.path.isfile(dest_path):
                fn = dest.split("#")[0]  # Split anchor name from filename
                logging.error(
                    "In {0}: "
                    "Could not find the linked asset file "
                    "{1} in {2}. If this is a URL, it must be "
                    "prefixed with http(s):// or ftp://.".format(
                        self.filename, dest, dest_path))
                        self.filename, fn, dest_path))
                return False
        else:
            logging.warning(
            logging.debug(
                "In {0}: "
                "Skipped validation of link {1}".format(self.filename, dest))
        return True

    def _validate_links(self, links_to_skip=()):
    def _partition_links(self):
        """Fetch links in document. If this template has special requirements
        for link text (eg only some links' text should match dest page title),
        filter the list accordingly.

        Default behavior: don't check the text of any links"""
        check_text = []
        no_check_text = self.ast.find_external_links()

        return check_text, no_check_text

    def _validate_links(self):
        """Validate all references to external content

        This includes links AND images: these are the two types of node that
        CommonMark assigns a .destination property"""
        links = self.ast.find_external_links()
        check_text, no_check_text = self._partition_links()

        valid = True
        for link_node in links:
            if link_node.destination not in links_to_skip:
                res = self._validate_one_link(link_node)
        for link_node in check_text:
            res = self._validate_one_link(link_node, check_text=True)
            valid = valid and res

        for link_node in no_check_text:
            res = self._validate_one_link(link_node, check_text=False)
            valid = valid and res
        return valid

@@ -309,6 +337,11 @@ class IndexPageValidator(MarkdownValidator):
    DOC_HEADERS = {'layout': vh.is_str,
                   'title': vh.is_str}

    def _partition_links(self):
        """Check the text of every link in index.md"""
        check_text = self.ast.find_external_links()
        return check_text, []

    def _validate_intro_section(self):
        """Validate the intro section

@@ -339,12 +372,6 @@ class IndexPageValidator(MarkdownValidator):
                    self.filename))
        return intro_section and prereqs_tests

    def _validate_links(self, links_to_skip=('motivation.html',
                                             'reference.html',
                                             'discussion.html',
                                             'instructors.html')):
        return super(IndexPageValidator, self)._validate_links(links_to_skip)

    def _run_tests(self):
        parent_tests = super(IndexPageValidator, self)._run_tests()
        tests = [self._validate_intro_section()]
@@ -418,6 +445,15 @@ class ReferencePageValidator(MarkdownValidator):
                   "title": vh.is_str,
                   "subtitle": vh.is_str}

    def _partition_links(self):
        """For reference.md, only check that text of link matches
        dest page subtitle if the link is in a heading"""
        all_links = self.ast.find_external_links()
        check_text = self.ast.find_external_links(
            parent_crit=self.ast.is_heading)
        dont_check_text = [n for n in all_links if n not in check_text]
        return check_text, dont_check_text

    def _validate_glossary_entry(self, glossary_entry):
        """Validate glossary entry

@@ -491,6 +527,15 @@ class InstructorPageValidator(MarkdownValidator):
                   "title": vh.is_str,
                   "subtitle": vh.is_str}

    def _partition_links(self):
        """For instructors.md, only check that text of link matches
        dest page subtitle if the link is in a heading"""
        all_links = self.ast.find_external_links()
        check_text = self.ast.find_external_links(
            parent_crit=self.ast.is_heading)
        dont_check_text = [n for n in all_links if n not in check_text]
        return check_text, dont_check_text


class LicensePageValidator(MarkdownValidator):
    """Validate LICENSE.md: user should not edit this file"""
+46 −0
Original line number Diff line number Diff line
@@ -202,6 +202,18 @@ Paragraph of introductory material.
        self.assertFalse(validator._validate_intro_section())

    # TESTS INVOLVING LINKS TO OTHER CONTENT
    def test_should_check_text_of_all_links_in_index(self):
        """Text of every local-html link in index.md should
        match dest page title"""
        validator = self._create_validator("""
## [This link is in a heading](reference.html)
[Topic Title One](01-one.html#anchor)""")
        links = validator.ast.find_external_links()
        check_text, dont_check_text = validator._partition_links()

        self.assertEqual(len(dont_check_text), 0)
        self.assertEqual(len(check_text), 2)

    def test_file_links_validate(self):
        """Verify that all links in a sample file validate.
        Involves checking for example files; may fail on "core" branch"""
@@ -303,6 +315,18 @@ minutes: not a number
Some text""")
        self.assertFalse(validator._validate_has_no_headings())

    def test_should_not_check_text_of_links_in_topic(self):
        """Never check that text of local-html links in topic
        matches dest title """
        validator = self._create_validator("""
## [This link is in a heading](reference.html)
[Topic Title One](01-one.html#anchor)""")
        links = validator.ast.find_external_links()
        check_text, dont_check_text = validator._partition_links()

        self.assertEqual(len(dont_check_text), 2)
        self.assertEqual(len(check_text), 0)

    def test_sample_file_passes_validation(self):
        sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
        res = sample_validator.validate()
@@ -385,6 +409,28 @@ class TestInstructorPage(BaseTemplateTest):
    SAMPLE_FILE = os.path.join(MARKDOWN_DIR, "instructors.md")
    VALIDATOR = check.InstructorPageValidator

    def test_should_selectively_check_text_of_links_in_topic(self):
        """Only verify that text of local-html links in topic
        matches dest title if the link is in a heading"""
        validator = self._create_validator("""
## [Reference](reference.html)

[Topic Title One](01-one.html#anchor)""")
        check_text, dont_check_text = validator._partition_links()

        self.assertEqual(len(dont_check_text), 1)
        self.assertEqual(len(check_text), 1)

    def test_link_dest_bad_while_text_ignored(self):
        validator = self._create_validator("""
[ignored text](nonexistent.html)""")
        self.assertFalse(validator._validate_links())

    def test_link_dest_good_while_text_ignored(self):
        validator = self._create_validator("""
[ignored text](01-one.html)""")
        self.assertTrue(validator._validate_links())

    def test_sample_file_passes_validation(self):
        sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
        res = sample_validator.validate()
+9 −3
Original line number Diff line number Diff line
@@ -121,21 +121,27 @@ class CommonMarkHelper(object):

        return dest, link_text

    def find_external_links(self, ast_node=None):
    def find_external_links(self, ast_node=None, parent_crit=None):
        """Recursive function that locates all references to external content
         under specified node. (links or images)"""
        ast_node = ast_node or self.data
        if parent_crit is None:
            # User can optionally provide a function to filter link list
            # based on where link appears. (eg, only links in headings)
            # If no filter is provided, accept all links in that node.
            parent_crit = lambda n: True

        # Link can be node itself, or hiding in inline content
        links = [n for n in ast_node.inline_content
                 if self.is_external(n)]
                 if self.is_external(n) and parent_crit(ast_node)]

        if self.is_external(ast_node):
            links.append(ast_node)

        # Also look for links in sub-nodes
        for n in ast_node.children:
            links.extend(self.find_external_links(n))
            links.extend(self.find_external_links(n,
                                                  parent_crit=parent_crit))

        return links