Commit 220eedc6 authored by Raniere Silva's avatar Raniere Silva Committed by GitHub
Browse files

Merge pull request #126 from gvwilson/consolidating-links

Checking consolidated links
parents 3d04cbbf d2710514
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -89,7 +89,7 @@ ${RMD_DST} : ${RMD_SRC}

## lesson-check     : validate lesson Markdown.
lesson-check :
	@bin/lesson_check.py -s . -p ${PARSER}
	@bin/lesson_check.py -s . -p ${PARSER} -r _includes/links.md

## lesson-check-all : validate lesson Markdown, checking line lengths and trailing whitespace.
lesson-check-all :
+78 −10
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ from optparse import OptionParser

from util import Reporter, read_markdown, load_yaml, check_unwanted_files, require, IMAGE_FILE_SUFFIX

__version__ = '0.2'
__version__ = '0.3'

# Where to look for source Markdown files.
SOURCE_DIRS = ['', '_episodes', '_extras']
@@ -48,7 +48,10 @@ P_TRAILING_WHITESPACE = re.compile(r'\s+$')
P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')

# Pattern to match internally-defined Markdown links.
P_INTERNALLY_DEFINED_LINK = re.compile(r'\[[^\]]+\]\[[^\]]+\]')
P_INTERNAL_LINK_REF = re.compile(r'\[([^\]]+)\]\[([^\]]+)\]')

# Pattern to match reference links (to resolve internally-defined references).
P_INTERNAL_LINK_DEF = re.compile(r'^\[([^\]]+)\]:\s*(.+)')

# What kinds of blockquotes are allowed?
KNOWN_BLOCKQUOTES = {
@@ -103,6 +106,8 @@ def main():
    args = parse_args()
    args.reporter = Reporter()
    check_config(args.reporter, args.source_dir)
    args.references = read_references(args.reporter, args.reference_path)

    docs = read_all_markdown(args.source_dir, args.parser)
    check_fileset(args.source_dir, args.reporter, docs.keys())
    check_unwanted_files(args.source_dir, args.reporter)
@@ -110,6 +115,7 @@ def main():
        checker = create_checker(args, filename, docs[filename])
        checker.check()
    check_figures(args.source_dir, args.reporter)

    args.reporter.report()


@@ -126,6 +132,10 @@ def parse_args():
                      default=None,
                      dest='parser',
                      help='path to Markdown parser')
    parser.add_option('-r', '--references',
                      default=None,
                      dest='reference_path',
                      help='path to Markdown file of external references')
    parser.add_option('-s', '--source',
                      default=os.curdir,
                      dest='source_dir',
@@ -160,6 +170,37 @@ def check_config(reporter, source_dir):
                   '"root" not set to ".." in configuration')


def read_references(reporter, ref_path):
    """Read shared file of reference links, returning dictionary of valid references
    {symbolic_name : URL}
    """

    result = {}
    urls_seen = set()
    if ref_path:
        with open(ref_path, 'r') as reader:
            for (num, line) in enumerate(reader):
                line_num = num + 1
                m = P_INTERNAL_LINK_DEF.search(line)
                require(m,
                        '{0}:{1} not valid reference:\n{2}'.format(ref_path, line_num, line.rstrip()))
                name = m.group(1)
                url = m.group(2)
                require(name,
                        'Empty reference at {0}:{1}'.format(ref_path, line_num))
                reporter.check(name not in result,
                               ref_path,
                               'Duplicate reference {0} at line {1}',
                               name, line_num)
                reporter.check(url not in urls_seen,
                               ref_path,
                               'Duplicate definition of URL {0} at line {1}',
                               url, line_num)
                result[name] = url
                urls_seen.add(url)
    return result


def read_all_markdown(source_dir, parser):
    """Read source files, returning
    {path : {'metadata':yaml, 'metadata_len':N, 'text':text, 'lines':[(i, line, len)], 'doc':doc}}
@@ -274,7 +315,7 @@ class CheckBase(object):


    def check(self):
        """Run tests on metadata."""
        """Run tests."""

        self.check_metadata()
        self.check_line_lengths()
@@ -342,17 +383,16 @@ class CheckBase(object):
    def check_defined_link_references(self):
        """Check that defined links resolve in the file.

        Internally-defined links match the pattern [text][label].  If
        the label contains '{{...}}', it is hopefully a references to
        a configuration value - we should check that, but don't right
        now.
        Internally-defined links match the pattern [text][label].
        """

        result = set()
        for node in self.find_all(self.doc, {'type' : 'text'}):
            for match in P_INTERNALLY_DEFINED_LINK.findall(node['value']):
                if '{{' not in match:
                    result.add(match)
            for match in P_INTERNAL_LINK_REF.findall(node['value']):
                text = match[0]
                link = match[1]
                if link not in self.args.references:
                    result.add('"{0}"=>"{1}"'.format(text, link))
        self.reporter.check(not result,
                            self.filename,
                            'Internally-defined links may be missing definitions: {0}',
@@ -441,6 +481,14 @@ class CheckEpisode(CheckBase):
    def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
        super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)


    def check(self):
        """Run extra tests."""

        super(CheckEpisode, self).check()
        self.check_reference_inclusion()


    def check_metadata(self):
        super(CheckEpisode, self).check_metadata()
        if self.metadata:
@@ -467,6 +515,26 @@ class CheckEpisode(CheckBase):
                                  name, type(self.metadata[name]), type_)


    def check_reference_inclusion(self):
        """Check that links file has been included."""

        if not self.args.reference_path:
            return

        for (i, last_line, line_len) in reversed(self.lines):
            if last_line:
                break

        require(last_line,
                'No non-empty lines in {0}'.format(self.filename))

        include_filename = os.path.split(self.args.reference_path)[-1]
        if include_filename not in last_line:
            self.reporter.add(self.filename,
                              'episode does not include "{0}"',
                              include_filename)


class CheckReference(CheckBase):
    """Check the reference page."""