Commit d8753bac authored by Raniere Silva's avatar Raniere Silva
Browse files

Merge pull request #163 from swcarpentry/core

Sync gh-pages with newest core changes
parents 6fad2483 024e1a0a
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -24,6 +24,9 @@ INCLUDES = \
# Chunk options for knitr (used in R conversion).
R_CHUNK_OPTS = tools/chunk-options.R

# Ensure that intermediate (generated) Markdown files from R are kept.
.SECONDARY: $(DST_RMD)

# Default action is to show what commands are available.
all : commands

+119 −41
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ Call at command line with flag -h to see options and usage instructions.
"""

import argparse
import collections
import glob
import hashlib
import logging
@@ -31,9 +32,15 @@ class MarkdownValidator(object):
    Contains basic validation skeleton to be extended for specific page types
    """
    HEADINGS = []  # List of strings containing expected heading text

    # Callout boxes (blockquote items) have special rules.
    # Dict of tuples for each callout type: {style: (title, min, max)}
    CALLOUTS = {}

    WARN_ON_EXTRA_HEADINGS = True  # Warn when other headings are present?

    DOC_HEADERS = {}  # Rows in header section (first few lines of document).
    # Validate YAML doc headers: dict of {header text: validation_func}
    DOC_HEADERS = {}

    def __init__(self, filename=None, markdown=None):
        """Perform validation on a Markdown document.
@@ -59,6 +66,9 @@ class MarkdownValidator(object):
        ast = self._parse_markdown(self.markdown)
        self.ast = vh.CommonMarkHelper(ast)

        # Keep track of how many times callout box styles are used
        self._callout_counts = collections.Counter()

    def _parse_markdown(self, markdown):
        parser = CommonMark.DocParser()
        ast = parser.parse(markdown)
@@ -146,7 +156,6 @@ class MarkdownValidator(object):

    def _validate_section_heading_order(self, ast_node=None, headings=None):
        """Verify that section headings appear, and in the order expected"""
        # TODO: Refactor into individual tests in the future
        if ast_node is None:
            ast_node = self.ast.data
            headings = self.HEADINGS
@@ -202,6 +211,100 @@ class MarkdownValidator(object):
        return (len(missing_headings) == 0) and \
            valid_order and no_extra and correct_level

    def _validate_one_callout(self, callout_node):
        """
        Logic to validate a single callout box (defined as a blockquoted
        section that starts with a heading). Checks that:

        * First child of callout box should be a lvl 2 header with
          known title & CSS style
        * Callout box must have at least one child after the heading

        An additional test is done in another function:
        * Checks # times callout style appears in document, minc <= n <= maxc
        """
        heading_node = callout_node.children[0]
        valid_head_lvl = self.ast.is_heading(heading_node, heading_level=2)
        title, styles = self.ast.get_heading_info(heading_node)

        if not valid_head_lvl:
            logging.error("In {0}: "
                          "Callout box titled '{1}' must start with a "
                          "level 2 heading".format(self.filename, title))

        try:
            style = styles[0]
        except IndexError:
            logging.error(
                "In {0}: "
                "Callout section titled '{1}' must specify "
                "a CSS style".format(self.filename, title))
            return False

        # Track # times this style is used in any callout
        self._callout_counts[style] += 1

        # Verify style actually in callout spec
        if style not in self.CALLOUTS:
            spec_title = None
            valid_style = False
        else:
            spec_title, _, _ = self.CALLOUTS[style]
            valid_style = True

        has_children = self.ast.has_number_children(callout_node, minc=2)
        if spec_title is not None and title != spec_title:
            # Callout box must have specified heading title
            logging.error(
                "In {0}: "
                "Callout section with style '{1}' should have "
                "title '{2}'".format(self.filename, style, spec_title))
            valid_title = False
        else:
            valid_title = True

        res = (valid_style and valid_title and has_children and valid_head_lvl)
        return res

    def _validate_callouts(self):
        """
        Validate all sections that appear as callouts

        The style is a better determinant of callout than the title
        """
        callout_nodes = self.ast.get_callouts()
        callouts_valid = True

        # Validate all the callout nodes present
        for n in callout_nodes:
            res = self._validate_one_callout(n)
            callouts_valid = callouts_valid and res

        found_styles = self._callout_counts

        # Issue error if style is not present correct # times
        missing_styles = [style
                          for style, (title, minc, maxc) in self.CALLOUTS.items()
                          if not ((minc or 0) <= found_styles[style]
                                  <= (maxc or sys.maxsize))]
        unknown_styles = [k for k in found_styles if k not in self.CALLOUTS]

        for style in unknown_styles:
            logging.error("In {0}: "
                          "Found callout box with unrecognized "
                          "style '{1}'".format(self.filename, style))

        for style in missing_styles:
            minc = self.CALLOUTS[style][1]
            maxc = self.CALLOUTS[style][2]
            logging.error("In {0}: "
                          "Expected between min {1} and max {2} callout boxes "
                          "with style '{3}'".format(
                self.filename, minc, maxc, style))

        return (callouts_valid and
                len(missing_styles) == 0 and len(unknown_styles) == 0)

    # Link validation methods
    def _validate_one_html_link(self, link_node, check_text=False):
        """
@@ -259,9 +362,10 @@ class MarkdownValidator(object):
            # Validate local html links have matching md file
            return self._validate_one_html_link(link_node,
                                                check_text=check_text)
        elif not re.match(r"^((https?|ftp)://.+)", dest, re.IGNORECASE)\
        elif not re.match(r"^((https?|ftp)://.+)|mailto:",
                          dest, re.IGNORECASE)\
                and not re.match(r"^#.*", dest):
            # If not web URL, and not anchor on same page, then
            # If not web or email URL, and not anchor on same page, then
            #  verify that local file exists
            dest_path = os.path.join(self.lesson_dir, dest)
            dest_path = dest_path.split("#")[0]  # Split anchor from filename
@@ -316,6 +420,7 @@ class MarkdownValidator(object):
        """
        tests = [self._validate_doc_headers(),
                 self._validate_section_heading_order(),
                 self._validate_callouts(),
                 self._validate_links()]

        return all(tests)
@@ -337,6 +442,8 @@ class IndexPageValidator(MarkdownValidator):
    DOC_HEADERS = {'layout': vh.is_str,
                   'title': vh.is_str}

    CALLOUTS = {'prereq': ("Prerequisites", 1, 1)}

    def _partition_links(self):
        """Check the text of every link in index.md"""
        check_text = self.ast.find_external_links()
@@ -354,23 +461,7 @@ class IndexPageValidator(MarkdownValidator):
                "Expected paragraph of introductory text at {1}".format(
                    self.filename, intro_block.start_line))

        # Validate the prerequisites block
        prereqs_block = self.ast.get_block_titled("Prerequisites",
                                                  heading_level=2)
        if prereqs_block:
            # Found the expected block; now check contents
            prereqs_tests = self.ast.has_number_children(prereqs_block[0],
                                                         minc=2)
        else:
            prereqs_tests = False

        if prereqs_tests is False:
            logging.error(
                "In {0}: "
                "Intro should contain a blockquoted section with level 2 "
                "title 'Prerequisites'. Section should not be empty.".format(
                    self.filename))
        return intro_section and prereqs_tests
        return intro_section

    def _run_tests(self):
        parent_tests = super(IndexPageValidator, self)._run_tests()
@@ -385,23 +476,9 @@ class TopicPageValidator(MarkdownValidator):
                   "subtitle": vh.is_str,
                   "minutes": vh.is_numeric}

    # TODO: Write validator for, eg, challenge section
    def _validate_learning_objective(self):
        learn_node = self.ast.get_block_titled("Learning Objectives",
                                               heading_level=2)
        if learn_node:
            # In addition to title, the node must have some content
            node_tests = self.ast.has_number_children(learn_node[0], minc=2)
        else:
            node_tests = False

        if node_tests is False:
            logging.error(
                "In {0}: "
                "Page should contain a blockquoted section with level 2 "
                "title 'Learning Objectives'. Section should not "
                "be empty.".format(self.filename))
        return node_tests
    CALLOUTS = {"objectives": ("Learning Objectives", 1, 1),
                "callout": (None, 0, None),
                "challenge": (None, 0, None)}

    def _validate_has_no_headings(self):
        """Check headings
@@ -423,13 +500,14 @@ class TopicPageValidator(MarkdownValidator):

    def _run_tests(self):
        parent_tests = super(TopicPageValidator, self)._run_tests()
        tests = [self._validate_has_no_headings(),
                 self._validate_learning_objective()]
        tests = [self._validate_has_no_headings()]
        return all(tests) and parent_tests


class MotivationPageValidator(MarkdownValidator):
    """Validate motivation.md"""
    WARN_ON_EXTRA_HEADINGS = False

    DOC_HEADERS = {"layout": vh.is_str,
                   "title": vh.is_str,
                   "subtitle": vh.is_str}
+1 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@

library("knitr")
opts_chunk$set(tidy = FALSE, results = "markup", comment = NA,
               fig.align = "center")
               fig.align = "center", fig.path = "fig/")

# The hooks below add html tags to the code chunks and their output so that they
# are properly formatted when the site is built with jekyll.
+121 −8
Original line number Diff line number Diff line
@@ -188,18 +188,13 @@ Paragraph of introductory material.
        self.assertTrue(validator._validate_intro_section())

    def test_fail_when_prereq_section_has_incorrect_heading_level(self):
        validator = self._create_validator("""---
layout: lesson
title: Lesson Title
---
Paragraph of introductory material.

> # Prerequisites
        validator = self._create_validator("""
> # Prerequisites {.prereq}
>
> A short paragraph describing what learners need to know
> before tackling this lesson.
""")
        self.assertFalse(validator._validate_intro_section())
        self.assertFalse(validator._validate_callouts())

    # TESTS INVOLVING LINKS TO OTHER CONTENT
    def test_should_check_text_of_all_links_in_index(self):
@@ -291,6 +286,73 @@ SQLite uses the integers 0 and 1 for the former, and represents the latter as di
            """Use [this CSV](data.csv) for the exercise.""")
        self.assertFalse(validator._validate_links())

    ### Tests involving callout/blockquote sections
    def test_one_prereq_callout_passes(self):
        """index.md should have one, and only one, prerequisites box"""
        validator = self._create_validator("""> ## Prerequisites {.prereq}
>
> What learners need to know before tackling this lesson.
""")
        self.assertTrue(validator._validate_callouts())

    def test_two_prereq_callouts_fail(self):
        """More than one prereq callout box is not allowed"""
        validator = self._create_validator("""> ## Prerequisites {.prereq}
>
> What learners need to know before tackling this lesson.

A spacer paragraph

> ## Prerequisites {.prereq}
>
> A second prerequisites box should cause an error
""")
        self.assertFalse(validator._validate_callouts())

    def test_callout_without_style_fails(self):
        """A callout box will fail if it is missing the required style"""
        validator = self._create_validator("""> ## Prerequisites
>
> What learners need to know before tackling this lesson.
""")
        self.assertFalse(validator._validate_callouts())

    def test_callout_with_wrong_title_fails(self):
        """A callout box will fail if it has the wrong title"""
        validator = self._create_validator("""> ## Wrong title {.prereq}
>
> What learners need to know before tackling this lesson.
""")
        self.assertFalse(validator._validate_callouts())

    def test_unknown_callout_style_fails(self):
        """A callout whose style is unrecognized by template is invalid"""
        validator = self._create_validator("""> ## Any title {.callout}
>
> What learners need to know before tackling this lesson.
""")
        callout_node = validator.ast.get_callouts()[0]
        self.assertFalse(validator._validate_one_callout(callout_node))

    def test_block_ignored_sans_heading(self):
        """
        Blockquotes only count as callouts if they have a heading
        """
        validator = self._create_validator("""> Prerequisites {.prereq}
>
> What learners need to know before tackling this lesson.
""")
        callout_nodes = validator.ast.get_callouts()
        self.assertEqual(len(callout_nodes), 0)

    def test_callout_heading_must_be_l2(self):
        """Callouts will fail validation if the heading is not level 2"""
        validator = self._create_validator("""> ### Prerequisites {.prereq}
>
> What learners need to know before tackling this lesson.
""")
        self.assertFalse(validator._validate_callouts())


class TestTopicPage(BaseTemplateTest):
    """Verifies that the topic page validator works as expected"""
@@ -327,6 +389,44 @@ Some text""")
        self.assertEqual(len(dont_check_text), 2)
        self.assertEqual(len(check_text), 0)

    def test_pass_when_optional_callouts_absent(self):
        """Optional block titles should be optional"""
        validator = self._create_validator("""> ## Learning Objectives {.objectives}
>
> * All topic pages must have this callout""")
        self.assertTrue(validator._validate_callouts())


    def test_callout_style_passes_regardless_of_title(self):
        """Verify that certain kinds of callout box can be recognized solely
        by style, regardless of the heading title"""
        validator = self._create_validator("""> ## Learning Objectives {.objectives}
>
> * All topic pages must have this callout

> ## Some random title {.callout}
>
> Some informative text""")

        self.assertTrue(validator._validate_callouts())

    def test_callout_style_allows_duplicates(self):
        """Multiple blockquoted sections with style 'callout' are allowed"""
        validator = self._create_validator("""> ## Learning Objectives {.objectives}
>
> * All topic pages must have this callout

> ##  Callout box one {.callout}
>
> Some informative text

Spacer paragraph

> ## Callout box two {.callout}
>
> Further exposition""")
        self.assertTrue(validator._validate_callouts())

    def test_sample_file_passes_validation(self):
        sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
        res = sample_validator.validate()
@@ -398,6 +498,19 @@ Key Word 2
""")
        self.assertTrue(validator._validate_glossary())

    def test_callout_fails_when_none_specified(self):
        """The presence of a callout box should cause validation to fail
           when the template doesn't define any recognized callouts

           (No "unknown" blockquote sections are allowed)
           """
        validator = self._create_validator("""> ## Learning Objectives {.objectives}
>
> * Learning objective 1
> * Learning objective 2""")

        self.assertFalse(validator._validate_callouts())

    def test_sample_file_passes_validation(self):
        sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
        res = sample_validator.validate()
+49 −15
Original line number Diff line number Diff line
@@ -92,6 +92,8 @@ class CommonMarkHelper(object):
            in index.md

        Returns empty list if no appropriate node is found"""

        # TODO: Deprecate in favor of callout validator
        if ast_node is None:
            ast_node = self.data
        return [n for n in ast_node.children
@@ -102,24 +104,17 @@ class CommonMarkHelper(object):
                    heading_level=heading_level,
                    show_msg=False)]

    # Helpers to fetch specific document sections
    def get_section_headings(self, ast_node=None):
        """Returns a list of ast nodes that are headings"""
        if ast_node is None:
            ast_node = self.data
        return [n for n in ast_node.children if self.is_heading(n)]

    def get_link_info(self, link_node):
        """Given a link node, return the link title and destination"""
        if not self.is_external(link_node):
            raise TypeError("Cannot apply this method to something that is not a link")

        dest = link_node.destination
        try:
            link_text = link_node.label[0].c
        except:
            link_text = None

        return dest, link_text
    def get_callouts(self, ast_node=None):
        if ast_node is None:
            ast_node = self.data
        return [n for n in ast_node.children if self.is_callout(n)]

    def find_external_links(self, ast_node=None, parent_crit=None):
        """Recursive function that locates all references to external content
@@ -145,9 +140,31 @@ class CommonMarkHelper(object):

        return links

    # Helpers to get information from a specific node type
    def get_link_info(self, link_node):
        """Given a link node, return the link title and destination"""
        if not self.is_external(link_node):
            raise TypeError("Cannot apply this method to something that is not a link")

        dest = link_node.destination
        try:
            link_text = link_node.label[0].c
        except:
            link_text = None

        return dest, link_text

    def get_heading_info(self, heading_node):
        """Get heading text and list of all css styles applied"""
        heading = heading_node.strings[0]
        text = strip_attrs(heading)
        css = get_css_class(heading)
        return text, css

    # Functions to query type or content of nodes
    def has_section_heading(self, section_title, ast_node=None,
                            heading_level=2, limit=sys.maxsize, show_msg=True):
        """Does the file contain (<= x copies of) specified heading text?
        """Does the section contain (<= x copies of) specified heading text?
        Will strip off any CSS attributes when looking for the section title"""
        if ast_node is None:
            ast_node = self.data
@@ -183,9 +200,15 @@ class CommonMarkHelper(object):
        """Is the node a horizontal rule (hr)?"""
        return ast_node.t == 'HorizontalRule'

    def is_heading(self, ast_node):
    def is_heading(self, ast_node, heading_level=None):
        """Is the node a heading/ title?"""
        return ast_node.t == "ATXHeader"
        has_tag = ast_node.t == "ATXHeader"

        if heading_level is None:
            has_level = True
        else:
            has_level = (ast_node.level == heading_level)
        return has_tag and has_level

    def is_paragraph(self, ast_node):
        """Is the node a paragraph?"""
@@ -206,3 +229,14 @@ class CommonMarkHelper(object):
    def is_block(self, ast_node):
        """Is the node a BlockQuoted element?"""
        return ast_node.t == "BlockQuote"

    def is_callout(self, ast_node):
        """Composite element: "callout" elements in SWC templates are
        blockquotes whose first child element is a heading"""
        if len(ast_node.children) > 0 and \
                self.is_heading(ast_node.children[0]):
            has_heading = True
        else:
            has_heading = False

        return self.is_block(ast_node) and has_heading
 No newline at end of file