Commit 6cd77e30 authored by Andy Boughton's avatar Andy Boughton Committed by Andy Boughton
Browse files

Add validation for callout boxes

Implement template-specific callout rules. Add tests and fix bugs revealed.

Narrow def of callout: block must have heading.

Simplify way that callout counts are tracked, improve log messages, and add further unit tests of callout validation.

Ensure that callout validation is actually run
parent 0ba3dc30
Loading
Loading
Loading
Loading
+114 −39
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ Call at command line with flag -h to see options and usage instructions.
"""

import argparse
import collections
import glob
import hashlib
import logging
@@ -31,9 +32,15 @@ class MarkdownValidator(object):
    Contains basic validation skeleton to be extended for specific page types
    """
    HEADINGS = []  # List of strings containing expected heading text

    # Callout boxes (blockquote items) have special rules.
    # Dict of tuples for each callout type: {style: (title, min, max)}
    CALLOUTS = {}

    WARN_ON_EXTRA_HEADINGS = True  # Warn when other headings are present?

    DOC_HEADERS = {}  # Rows in header section (first few lines of document).
    # Validate YAML doc headers: dict of {header text: validation_func}
    DOC_HEADERS = {}

    def __init__(self, filename=None, markdown=None):
        """Perform validation on a Markdown document.
@@ -59,6 +66,9 @@ class MarkdownValidator(object):
        ast = self._parse_markdown(self.markdown)
        self.ast = vh.CommonMarkHelper(ast)

        # Keep track of how many times callout box styles are used
        self._callout_counts = collections.Counter()

    def _parse_markdown(self, markdown):
        parser = CommonMark.DocParser()
        ast = parser.parse(markdown)
@@ -146,7 +156,6 @@ class MarkdownValidator(object):

    def _validate_section_heading_order(self, ast_node=None, headings=None):
        """Verify that section headings appear, and in the order expected"""
        # TODO: Refactor into individual tests in the future
        if ast_node is None:
            ast_node = self.ast.data
            headings = self.HEADINGS
@@ -202,6 +211,100 @@ class MarkdownValidator(object):
        return (len(missing_headings) == 0) and \
            valid_order and no_extra and correct_level

    def _validate_one_callout(self, callout_node):
        """
        Logic to validate a single callout box (defined as a blockquoted
        section that starts with a heading). Checks that:

        * First child of callout box should be a lvl 2 header with
          known title & CSS style
        * Callout box must have at least one child after the heading

        An additional test is done in another function:
        * Checks # times callout style appears in document, minc <= n <= maxc
        """
        heading_node = callout_node.children[0]
        valid_head_lvl = self.ast.is_heading(heading_node, heading_level=2)
        title, styles = self.ast.get_heading_info(heading_node)

        if not valid_head_lvl:
            logging.error("In {0}: "
                          "Callout box titled '{1}' must start with a "
                          "level 2 heading".format(self.filename, title))

        try:
            style = styles[0]
        except IndexError:
            logging.error(
                "In {0}: "
                "Callout section titled '{1}' must specify "
                "a CSS style".format(self.filename, title))
            return False

        # Track # times this style is used in any callout
        self._callout_counts[style] += 1

        # Verify style actually in callout spec
        if style not in self.CALLOUTS:
            spec_title = None
            valid_style = False
        else:
            spec_title, _, _ = self.CALLOUTS[style]
            valid_style = True

        has_children = self.ast.has_number_children(callout_node, minc=2)
        if spec_title is not None and title != spec_title:
            # Callout box must have specified heading title
            logging.error(
                "In {0}: "
                "Callout section with style '{1}' should have "
                "title '{2}'".format(self.filename, style, spec_title))
            valid_title = False
        else:
            valid_title = True

        res = (valid_style and valid_title and has_children and valid_head_lvl)
        return res

    def _validate_callouts(self):
        """
        Validate all sections that appear as callouts

        The style is a better determinant of callout than the title
        """
        callout_nodes = self.ast.get_callouts()
        callouts_valid = True

        # Validate all the callout nodes present
        for n in callout_nodes:
            res = self._validate_one_callout(n)
            callouts_valid = callouts_valid and res

        found_styles = self._callout_counts

        # Issue error if style is not present correct # times
        missing_styles = [style
                          for style, (title, minc, maxc) in self.CALLOUTS.items()
                          if not ((minc or 0) <= found_styles[style]
                                  <= (maxc or sys.maxsize))]
        unknown_styles = [k for k in found_styles if k not in self.CALLOUTS]

        for style in unknown_styles:
            logging.error("In {0}: "
                          "Found callout box with unrecognized "
                          "style '{1}'".format(self.filename, style))

        for style in missing_styles:
            minc = self.CALLOUTS[style][1]
            maxc = self.CALLOUTS[style][2]
            logging.error("In {0}: "
                          "Expected between min {1} and max {2} callout boxes "
                          "with style '{3}'".format(
                self.filename, minc, maxc, style))

        return (callouts_valid and
                len(missing_styles) == 0 and len(unknown_styles) == 0)

    # Link validation methods
    def _validate_one_html_link(self, link_node, check_text=False):
        """
@@ -316,6 +419,7 @@ class MarkdownValidator(object):
        """
        tests = [self._validate_doc_headers(),
                 self._validate_section_heading_order(),
                 self._validate_callouts(),
                 self._validate_links()]

        return all(tests)
@@ -337,6 +441,8 @@ class IndexPageValidator(MarkdownValidator):
    DOC_HEADERS = {'layout': vh.is_str,
                   'title': vh.is_str}

    CALLOUTS = {'prereq': ("Prerequisites", 1, 1)}

    def _partition_links(self):
        """Check the text of every link in index.md"""
        check_text = self.ast.find_external_links()
@@ -354,23 +460,7 @@ class IndexPageValidator(MarkdownValidator):
                "Expected paragraph of introductory text at {1}".format(
                    self.filename, intro_block.start_line))

        # Validate the prerequisites block
        prereqs_block = self.ast.get_block_titled("Prerequisites",
                                                  heading_level=2)
        if prereqs_block:
            # Found the expected block; now check contents
            prereqs_tests = self.ast.has_number_children(prereqs_block[0],
                                                         minc=2)
        else:
            prereqs_tests = False

        if prereqs_tests is False:
            logging.error(
                "In {0}: "
                "Intro should contain a blockquoted section with level 2 "
                "title 'Prerequisites'. Section should not be empty.".format(
                    self.filename))
        return intro_section and prereqs_tests
        return intro_section

    def _run_tests(self):
        parent_tests = super(IndexPageValidator, self)._run_tests()
@@ -385,23 +475,9 @@ class TopicPageValidator(MarkdownValidator):
                   "subtitle": vh.is_str,
                   "minutes": vh.is_numeric}

    # TODO: Write validator for, eg, challenge section
    def _validate_learning_objective(self):
        learn_node = self.ast.get_block_titled("Learning Objectives",
                                               heading_level=2)
        if learn_node:
            # In addition to title, the node must have some content
            node_tests = self.ast.has_number_children(learn_node[0], minc=2)
        else:
            node_tests = False

        if node_tests is False:
            logging.error(
                "In {0}: "
                "Page should contain a blockquoted section with level 2 "
                "title 'Learning Objectives'. Section should not "
                "be empty.".format(self.filename))
        return node_tests
    CALLOUTS = {"objectives": ("Learning Objectives", 1, 1),
                "callout": (None, 0, None),
                "challenge": (None, 0, None)}

    def _validate_has_no_headings(self):
        """Check headings
@@ -423,8 +499,7 @@ class TopicPageValidator(MarkdownValidator):

    def _run_tests(self):
        parent_tests = super(TopicPageValidator, self)._run_tests()
        tests = [self._validate_has_no_headings(),
                 self._validate_learning_objective()]
        tests = [self._validate_has_no_headings()]
        return all(tests) and parent_tests


+121 −8
Original line number Diff line number Diff line
@@ -188,18 +188,13 @@ Paragraph of introductory material.
        self.assertTrue(validator._validate_intro_section())

    def test_fail_when_prereq_section_has_incorrect_heading_level(self):
        validator = self._create_validator("""---
layout: lesson
title: Lesson Title
---
Paragraph of introductory material.

> # Prerequisites
        validator = self._create_validator("""
> # Prerequisites {.prereq}
>
> A short paragraph describing what learners need to know
> before tackling this lesson.
""")
        self.assertFalse(validator._validate_intro_section())
        self.assertFalse(validator._validate_callouts())

    # TESTS INVOLVING LINKS TO OTHER CONTENT
    def test_should_check_text_of_all_links_in_index(self):
@@ -291,6 +286,73 @@ SQLite uses the integers 0 and 1 for the former, and represents the latter as di
            """Use [this CSV](data.csv) for the exercise.""")
        self.assertFalse(validator._validate_links())

    ### Tests involving callout/blockquote sections
    def test_one_prereq_callout_passes(self):
        """index.md should have one, and only one, prerequisites box"""
        validator = self._create_validator("""> ## Prerequisites {.prereq}
>
> What learners need to know before tackling this lesson.
""")
        self.assertTrue(validator._validate_callouts())

    def test_two_prereq_callouts_fail(self):
        """More than one prereq callout box is not allowed"""
        validator = self._create_validator("""> ## Prerequisites {.prereq}
>
> What learners need to know before tackling this lesson.

A spacer paragraph

> ## Prerequisites {.prereq}
>
> A second prerequisites box should cause an error
""")
        self.assertFalse(validator._validate_callouts())

    def test_callout_without_style_fails(self):
        """A callout box will fail if it is missing the required style"""
        validator = self._create_validator("""> ## Prerequisites
>
> What learners need to know before tackling this lesson.
""")
        self.assertFalse(validator._validate_callouts())

    def test_callout_with_wrong_title_fails(self):
        """A callout box will fail if it has the wrong title"""
        validator = self._create_validator("""> ## Wrong title {.prereq}
>
> What learners need to know before tackling this lesson.
""")
        self.assertFalse(validator._validate_callouts())

    def test_unknown_callout_style_fails(self):
        """A callout whose style is unrecognized by template is invalid"""
        validator = self._create_validator("""> ## Any title {.callout}
>
> What learners need to know before tackling this lesson.
""")
        callout_node = validator.ast.get_callouts()[0]
        self.assertFalse(validator._validate_one_callout(callout_node))

    def test_block_ignored_sans_heading(self):
        """
        Blockquotes only count as callouts if they have a heading
        """
        validator = self._create_validator("""> Prerequisites {.prereq}
>
> What learners need to know before tackling this lesson.
""")
        callout_nodes = validator.ast.get_callouts()
        self.assertEqual(len(callout_nodes), 0)

    def test_callout_heading_must_be_l2(self):
        """Callouts will fail validation if the heading is not level 2"""
        validator = self._create_validator("""> ### Prerequisites {.prereq}
>
> What learners need to know before tackling this lesson.
""")
        self.assertFalse(validator._validate_callouts())


class TestTopicPage(BaseTemplateTest):
    """Verifies that the topic page validator works as expected"""
@@ -327,6 +389,44 @@ Some text""")
        self.assertEqual(len(dont_check_text), 2)
        self.assertEqual(len(check_text), 0)

    def test_pass_when_optional_callouts_absent(self):
        """Optional block titles should be optional"""
        validator = self._create_validator("""> ## Learning Objectives {.objectives}
>
> * All topic pages must have this callout""")
        self.assertTrue(validator._validate_callouts())


    def test_callout_style_passes_regardless_of_title(self):
        """Verify that certain kinds of callout box can be recognized solely
        by style, regardless of the heading title"""
        validator = self._create_validator("""> ## Learning Objectives {.objectives}
>
> * All topic pages must have this callout

> ## Some random title {.callout}
>
> Some informative text""")

        self.assertTrue(validator._validate_callouts())

    def test_callout_style_allows_duplicates(self):
        """Multiple blockquoted sections with style 'callout' are allowed"""
        validator = self._create_validator("""> ## Learning Objectives {.objectives}
>
> * All topic pages must have this callout

> ##  Callout box one {.callout}
>
> Some informative text

Spacer paragraph

> ## Callout box two {.callout}
>
> Further exposition""")
        self.assertTrue(validator._validate_callouts())

    def test_sample_file_passes_validation(self):
        sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
        res = sample_validator.validate()
@@ -398,6 +498,19 @@ Key Word 2
""")
        self.assertTrue(validator._validate_glossary())

    def test_callout_fails_when_none_specified(self):
        """The presence of a callout box should cause validation to fail
           when the template doesn't define any recognized callouts

           (No "unknown" blockquote sections are allowed)
           """
        validator = self._create_validator("""> ## Learning Objectives {.objectives}
>
> * Learning objective 1
> * Learning objective 2""")

        self.assertFalse(validator._validate_callouts())

    def test_sample_file_passes_validation(self):
        sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
        res = sample_validator.validate()
+49 −15
Original line number Diff line number Diff line
@@ -92,6 +92,8 @@ class CommonMarkHelper(object):
            in index.md

        Returns empty list if no appropriate node is found"""

        # TODO: Deprecate in favor of callout validator
        if ast_node is None:
            ast_node = self.data
        return [n for n in ast_node.children
@@ -102,24 +104,17 @@ class CommonMarkHelper(object):
                    heading_level=heading_level,
                    show_msg=False)]

    # Helpers to fetch specific document sections
    def get_section_headings(self, ast_node=None):
        """Returns a list of ast nodes that are headings"""
        if ast_node is None:
            ast_node = self.data
        return [n for n in ast_node.children if self.is_heading(n)]

    def get_link_info(self, link_node):
        """Given a link node, return the link title and destination"""
        if not self.is_external(link_node):
            raise TypeError("Cannot apply this method to something that is not a link")

        dest = link_node.destination
        try:
            link_text = link_node.label[0].c
        except:
            link_text = None

        return dest, link_text
    def get_callouts(self, ast_node=None):
        if ast_node is None:
            ast_node = self.data
        return [n for n in ast_node.children if self.is_callout(n)]

    def find_external_links(self, ast_node=None, parent_crit=None):
        """Recursive function that locates all references to external content
@@ -145,9 +140,31 @@ class CommonMarkHelper(object):

        return links

    # Helpers to get information from a specific node type
    def get_link_info(self, link_node):
        """Given a link node, return the link title and destination"""
        if not self.is_external(link_node):
            raise TypeError("Cannot apply this method to something that is not a link")

        dest = link_node.destination
        try:
            link_text = link_node.label[0].c
        except:
            link_text = None

        return dest, link_text

    def get_heading_info(self, heading_node):
        """Get heading text and list of all css styles applied"""
        heading = heading_node.strings[0]
        text = strip_attrs(heading)
        css = get_css_class(heading)
        return text, css

    # Functions to query type or content of nodes
    def has_section_heading(self, section_title, ast_node=None,
                            heading_level=2, limit=sys.maxsize, show_msg=True):
        """Does the file contain (<= x copies of) specified heading text?
        """Does the section contain (<= x copies of) specified heading text?
        Will strip off any CSS attributes when looking for the section title"""
        if ast_node is None:
            ast_node = self.data
@@ -183,9 +200,15 @@ class CommonMarkHelper(object):
        """Is the node a horizontal rule (hr)?"""
        return ast_node.t == 'HorizontalRule'

    def is_heading(self, ast_node):
    def is_heading(self, ast_node, heading_level=None):
        """Is the node a heading/ title?"""
        return ast_node.t == "ATXHeader"
        has_tag = ast_node.t == "ATXHeader"

        if heading_level is None:
            has_level = True
        else:
            has_level = (ast_node.level == heading_level)
        return has_tag and has_level

    def is_paragraph(self, ast_node):
        """Is the node a paragraph?"""
@@ -206,3 +229,14 @@ class CommonMarkHelper(object):
    def is_block(self, ast_node):
        """Is the node a BlockQuoted element?"""
        return ast_node.t == "BlockQuote"

    def is_callout(self, ast_node):
        """Composite element: "callout" elements in SWC templates are
        blockquotes whose first child element is a heading"""
        if len(ast_node.children) > 0 and \
                self.is_heading(ast_node.children[0]):
            has_heading = True
        else:
            has_heading = False

        return self.is_block(ast_node) and has_heading
 No newline at end of file