Loading bin/lesson_check.py +13 −33 Original line number Diff line number Diff line Loading @@ -8,7 +8,6 @@ import sys import os import glob import json import yaml import re from optparse import OptionParser Loading Loading @@ -38,12 +37,6 @@ REQUIRED_FILES = { '%/setup.md': True, } # Required non-Markdown files. NON_MARKDOWN_FILES = { "AUTHORS", "CITATION" } # Episode filename pattern. P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$') Loading Loading @@ -97,10 +90,9 @@ def main(): """Main driver.""" args = parse_args() args.reporter = Reporter(args) check_config(args) check_non_markdown_files(args.source_dir, args.reporter) docs = read_all_markdown(args, args.source_dir) args.reporter = Reporter() check_config(args.reporter, args.source_dir) docs = read_all_markdown(args.source_dir, args.parser) check_fileset(args.source_dir, args.reporter, docs.keys()) for filename in docs.keys(): checker = create_checker(args, filename, docs[filename]) Loading Loading @@ -134,27 +126,15 @@ def parse_args(): return args def check_config(args): def check_config(reporter, source_dir): """Check configuration file.""" config_file = os.path.join(args.source_dir, '_config.yml') with open(config_file, 'r') as reader: config = yaml.load(reader) args.reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson') config_file = os.path.join(source_dir, '_config.yml') config = load_yaml(config_file) reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson') def check_non_markdown_files(source_dir, reporter): """Check presence of non-Markdown files.""" for filename in NON_MARKDOWN_FILES: path = os.path.join(source_dir, filename) reporter.check(os.path.exists(path), filename, "File not found") def read_all_markdown(args, source_dir): def read_all_markdown(source_dir, parser): """Read source files, returning {path : {'metadata':yaml, 'metadata_len':N, 'text':text, 'lines':[(i, line, len)], 'doc':doc}} """ Loading @@ -164,7 +144,7 @@ def read_all_markdown(args, source_dir): result = {} for pat in all_patterns: for filename in glob.glob(pat): data = read_markdown(args.parser, filename) data = read_markdown(parser, filename) if data: result[filename] = data return result Loading Loading @@ -239,6 +219,7 @@ class CheckBase(object): self.text = text self.lines = lines self.doc = doc self.layout = None Loading Loading @@ -371,7 +352,6 @@ class CheckEpisode(CheckBase): def __init__(self, args, filename, metadata, metadata_len, text, lines, doc): super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc) def check_metadata(self): super(CheckEpisode, self).check_metadata() if self.metadata: Loading bin/util.py +43 −16 Original line number Diff line number Diff line import sys import json import yaml from subprocess import Popen, PIPE try: import yaml except ImportError: print('Unable to import YAML module: please install PyYAML', file=sys.stderr) sys.exit(1) class Reporter(object): """Collect and report errors.""" def __init__(self, args): def __init__(self): """Constructor.""" super(Reporter, self).__init__() Loading Loading @@ -62,23 +66,13 @@ def read_markdown(parser, path): """ # Split and extract YAML (if present). metadata = None metadata_len = None with open(path, 'r') as reader: body = reader.read() pieces = body.split('---', 2) if len(pieces) == 3: try: metadata = yaml.load(pieces[1]) except yaml.YAMLError as e: print('Unable to parse YAML header in {0}:\n{1}'.format(path, e)) sys.exit(1) metadata_len = pieces[1].count('\n') body = pieces[2] metadata_raw, metadata_yaml, body = split_metadata(path, body) # Split into lines. offset = 0 if metadata_len is None else metadata_len lines = [(offset+i+1, l, len(l)) for (i, l) in enumerate(body.split('\n'))] metadata_len = 0 if metadata_raw is None else metadata_raw.count('\n') lines = [(metadata_len+i+1, line, len(line)) for (i, line) in enumerate(body.split('\n'))] # Parse Markdown. cmd = 'ruby {0}'.format(parser) Loading @@ -87,9 +81,42 @@ def read_markdown(parser, path): doc = json.loads(stdout_data) return { 'metadata': metadata, 'metadata': metadata_yaml, 'metadata_len': metadata_len, 'text': body, 'lines': lines, 'doc': doc } def split_metadata(path, text): """ Get raw (text) metadata, metadata as YAML, and rest of body. If no metadata, return (None, None, body). """ metadata_raw = None metadata_yaml = None metadata_len = None pieces = text.split('---', 2) if len(pieces) == 3: metadata_raw = pieces[1] text = pieces[2] try: metadata_yaml = yaml.load(metadata_raw) except yaml.YAMLError as e: print('Unable to parse YAML header in {0}:\n{1}'.format(path, e), file=sys.stderr) sys.exit(1) return metadata_raw, metadata_yaml, text def load_yaml(filename): """ Wrapper around YAML loading so that 'import yaml' and error handling is only needed in one place. """ with open(filename, 'r') as reader: return yaml.load(reader) bin/workshop_check.py +126 −155 Original line number Diff line number Diff line Loading @@ -7,37 +7,21 @@ docstrings on the checking functions for a summary of the checks. import sys import os import re import logging import yaml from collections import Counter __version__ = '0.6' # basic logging configuration logger = logging.getLogger(__name__) verbosity = logging.INFO # severity of at least INFO will emerge logger.setLevel(verbosity) # create console handler and set level to debug console_handler = logging.StreamHandler() console_handler.setLevel(verbosity) formatter = logging.Formatter('%(levelname)s: %(message)s') console_handler.setFormatter(formatter) logger.addHandler(console_handler) from datetime import date from util import Reporter, split_metadata # TODO: these regexp patterns need comments inside # Metadata field patterns. EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+' HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)' EVENTBRITE_PATTERN = r'\d{9,10}' URL_PATTERN = r'https?://.+' # Defaults. CARPENTRIES = ("dc", "swc") DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org' USAGE = 'Usage: "check-workshop path/to/index.html"\n' USAGE = 'Usage: "check-workshop path/to/root/directory"' # Country and language codes. Note that codes mean different things: 'ar' # is 'Arabic' as a language but 'Argentina' as a country. Loading Loading @@ -86,18 +70,9 @@ ISO_LANGUAGE = [ ] def add_error(msg, errors): """Add error to the list of errors.""" errors.append(msg) def add_suberror(msg, errors): """Add sub error, ie. error indented by 1 level ("\t"), to the list of errors.""" errors.append("\t{0}".format(msg)) def look_for_fixme(func): '''Decorator to fail test if text argument starts with "FIXME".''' """Decorator to fail test if text argument starts with "FIXME".""" def inner(arg): if (arg is not None) and \ isinstance(arg, str) and \ Loading Loading @@ -137,24 +112,26 @@ def check_language(language): @look_for_fixme def check_humandate(date): '''"humandate" must be a human-readable date with a 3-letter month and 4-digit year. Examples include "Feb 18-20, 2025" and "Feb 18 and 20, 2025". It may be in languages other than English, but the """ 'humandate' must be a human-readable date with a 3-letter month and 4-digit year. Examples include 'Feb 18-20, 2025' and 'Feb 18 and 20, 2025'. It may be in languages other than English, but the month name should be kept short to aid formatting of the main Software Carpentry web site.''' Software Carpentry web site. """ if "," not in date: if ',' not in date: return False month_dates, year = date.split(",") month_dates, year = date.split(',') # The first three characters of month_dates are not empty month = month_dates[:3] if any(char == " " for char in month): if any(char == ' ' for char in month): return False # But the fourth character is empty ("February" is illegal) if month_dates[3] != " ": if month_dates[3] != ' ': return False # year contains *only* numbers Loading @@ -168,65 +145,80 @@ def check_humandate(date): @look_for_fixme def check_humantime(time): '''"humantime" is a human-readable start and end time for the workshop, such as "09:00 - 16:00".''' """ 'humantime' is a human-readable start and end time for the workshop, such as '09:00 - 16:00'. """ return bool(re.match(HUMANTIME_PATTERN, time.replace(" ", ""))) return bool(re.match(HUMANTIME_PATTERN, time.replace(' ', ''))) def check_date(this_date): '''"startdate" and "enddate" are machine-readable start and end dates for the workshop, and must be in YYYY-MM-DD format, e.g., "2015-07-01".''' """ 'startdate' and 'enddate' are machine-readable start and end dates for the workshop, and must be in YYYY-MM-DD format, e.g., '2015-07-01'. """ from datetime import date # yaml automatically loads valid dates as datetime.date # YAML automatically loads valid dates as datetime.date. return isinstance(this_date, date) @look_for_fixme def check_latitude_longitude(latlng): '''"latlng" must be a valid latitude and longitude represented as two floating-point numbers separated by a comma.''' """ 'latlng' must be a valid latitude and longitude represented as two floating-point numbers separated by a comma. """ try: lat, lng = latlng.split(',') lat = float(lat) long = float(lng) return (-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0) except ValueError: return False return (-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0) def check_instructors(instructors): '''"instructor" must be a non-empty comma-separated list of quoted names, e.g. ['First name', 'Second name', ...']. Do not use "TBD" or other placeholders.''' """ 'instructor' must be a non-empty comma-separated list of quoted names, e.g. ['First name', 'Second name', ...']. Do not use 'TBD' or other placeholders. """ # yaml automatically loads list-like strings as lists # YAML automatically loads list-like strings as lists. return isinstance(instructors, list) and len(instructors) > 0 def check_helpers(helpers): '''"helper" must be a comma-separated list of quoted names, e.g. ['First name', 'Second name', ...']. The list may be empty. Do not use "TBD" or other placeholders.''' """ 'helper' must be a comma-separated list of quoted names, e.g. ['First name', 'Second name', ...']. The list may be empty. Do not use 'TBD' or other placeholders. """ # yaml automatically loads list-like strings as lists # YAML automatically loads list-like strings as lists. return isinstance(helpers, list) and len(helpers) >= 0 @look_for_fixme def check_email(email): '''"contact" must be a valid email address consisting of characters, a @, and more characters. It should not be the default contact email address "admin@software-carpentry.org".''' """ 'contact' must be a valid email address consisting of characters, an '@', and more characters. It should not be the default contact email address 'admin@software-carpentry.org'. """ return bool(re.match(EMAIL_PATTERN, email)) and \ (email != DEFAULT_CONTACT_EMAIL) def check_eventbrite(eventbrite): '''"eventbrite" (the Eventbrite registration key) must be 9 or more digits.''' """ 'eventbrite' (the Eventbrite registration key) must be 9 or more digits. It may appear as an integer or as a string. """ if isinstance(eventbrite, int): return True Loading @@ -236,15 +228,19 @@ def check_eventbrite(eventbrite): @look_for_fixme def check_etherpad(etherpad): '''"etherpad" must be a valid URL.''' """ 'etherpad' must be a valid URL. """ return bool(re.match(URL_PATTERN, etherpad)) @look_for_fixme def check_pass(value): '''This test always passes (it is used for "checking" things like addresses, for which no sensible validation is feasible).''' """ This test always passes (it is used for 'checking' things like the workshop address, for which no sensible validation is feasible). """ return True Loading @@ -265,38 +261,38 @@ HANDLERS = { 'humandate': (True, check_humandate, 'humandate invalid. Please use three-letter months like ' + '"Jan" and four-letter years like "2025".'), '"Jan" and four-letter years like "2025"'), 'humantime': (True, check_humantime, 'humantime doesn\'t include numbers'), 'startdate': (True, check_date, 'startdate invalid. Must be of format year-month-day, ' + 'i.e., 2014-01-31.'), 'i.e., 2014-01-31'), 'enddate': (False, check_date, 'enddate invalid. Must be of format year-month-day, i.e.,' + ' 2014-01-31.'), ' 2014-01-31'), 'latlng': (True, check_latitude_longitude, 'latlng invalid. Check that it is two floating point ' + 'numbers, separated by a comma.'), 'numbers, separated by a comma'), 'instructor': (True, check_instructors, 'instructor list isn\'t a valid list of format ' + '["First instructor", "Second instructor",..].'), '["First instructor", "Second instructor",..]'), 'helper': (True, check_helpers, 'helper list isn\'t a valid list of format ' + '["First helper", "Second helper",..].'), '["First helper", "Second helper",..]'), 'contact': (True, check_email, 'contact email invalid or still set to ' + '"{0}".'.format(DEFAULT_CONTACT_EMAIL)), 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid.'), 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid'), 'etherpad': (False, check_etherpad, 'Etherpad URL appears invalid.'), 'etherpad': (False, check_etherpad, 'Etherpad URL appears invalid'), 'venue': (False, check_pass, 'venue name not specified'), Loading @@ -310,102 +306,85 @@ REQUIRED = set([k for k in HANDLERS if HANDLERS[k][0]]) OPTIONAL = set([k for k in HANDLERS if not HANDLERS[k][0]]) def check_validity(data, function, errors, error_msg): '''Wrapper-function around the various check-functions.''' valid = function(data) if not valid: add_error(error_msg, errors) add_suberror('Offending entry is: "{0}"'.format(data), errors) return valid def check_blank_lines(reporter, raw): """ Blank lines are not allowed in category headers. """ def check_blank_lines(raw_data, errors, error_msg): '''Blank lines are not allowed in category headers.''' lines = [x.strip() for x in raw_data.split('\n')] if '' in lines: add_error(error_msg, errors) add_suberror('{0} blank lines found in header'.format(lines.count('')), errors) return False return True def check_categories(left, right, errors, error_msg): '''Report set difference of categories.''' result = left - right if result: add_error(error_msg, errors) add_suberror('Offending entries: {0}'.format(result), errors) return False return True lines = [(i, x) for (i, x) in enumerate(raw.strip().split('\n')) if not x.strip()] reporter.check(not lines, None, 'Blank line(s) in header: {0}', ', '.join(["{0}: {1}".format(i, x.rstrip()) for (i, x) in lines])) def get_header(text): '''Extract YAML header from raw data, returning (None, None) if no valid header found and (raw, parsed) if header found.''' def check_categories(reporter, left, right, msg): """ Report differences (if any) between two sets of categories. """ # YAML header must be right at the start of the file. if not text.startswith('---'): return None, None diff = left - right reporter.check(len(diff) == 0, None, '{0}: offending entries {1}', msg, sorted(list(diff))) # YAML header must start and end with '---' pieces = text.split('---') if len(pieces) < 3: return None, None # Return raw text and YAML-ized form. raw = pieces[1].strip() return raw, yaml.load(raw) def check_file(reporter, path, data): """ Get header from file, call all other functions, and check file for validity. """ def check_file(filename, data, errors): '''Get header from file, call all other functions and check file for validity. Return list of errors (empty when no errors).''' raw, header = get_header(data) if header is None: msg = ('Cannot find YAML header in given file "{0}".'.format(filename)) add_error(msg, errors) return errors # Get metadata as text and as YAML. raw, header, body = split_metadata(path, data) # Do we have any blank lines in the header? is_valid = check_blank_lines(raw, errors, 'There are blank lines in the header') check_blank_lines(reporter, raw) # Look through all header entries. If the category is in the input # file and is either required or we have actual data (as opposed to # a commented-out entry), we check it. If it *isn't* in the header # but is required, report an error. for category in HANDLERS: required, handler_function, error_message = HANDLERS[category] required, handler, message = HANDLERS[category] if category in header: if required or header[category]: is_valid &= check_validity(header[category], handler_function, errors, error_message) reporter.check(handler(header[category]), None, '{0}\n actual value "{1}"', message, header[category]) elif required: msg = 'index file is missing mandatory key "{0}"'.format(category) add_error(msg, errors) is_valid = False reporter.add(None, 'Missing mandatory key "{0}"', category) # Check whether we have missing or too many categories seen_categories = set(header.keys()) check_categories(reporter, REQUIRED, seen_categories, 'Missing categories') check_categories(reporter, seen_categories, REQUIRED.union(OPTIONAL), 'Superfluous categories') is_valid &= check_categories(REQUIRED, seen_categories, errors, 'There are missing categories') is_valid &= check_categories(seen_categories, REQUIRED.union(OPTIONAL), errors, 'There are superfluous categories') def check_config(reporter, filename): """ Check YAML configuration file. """ def check_config(filename, errors): '''Check YAML configuration file.''' config = load_yaml(filename) with open(filename, 'r') as reader: config = yaml.load(reader) kind = config.get('kind', None) reporter.check(kind == 'workshop', filename, 'Missing or unknown kind of event: {0}', kind) if config['kind'] != 'workshop': msg = 'Not configured as a workshop: found "{0}" instead'.format(config['kind']) add_error(msg, errors) carpentry = config.get('carpentry', None) reporter.check(carpentry in ('swc', 'dc'), filename, 'Missing or unknown carpentry: {0}', carpentry) def main(): Loading @@ -418,21 +397,13 @@ def main(): root_dir = sys.argv[1] index_file = os.path.join(root_dir, 'index.html') config_file = os.path.join(root_dir, '_config.yml') logger.info('Testing "{0}" and "{1}"'.format(index_file, config_file)) errors = [] check_config(config_file, errors) reporter = Reporter() check_config(reporter, config_file) with open(index_file) as reader: data = reader.read() check_file(index_file, data, errors) if errors: for m in errors: logger.error(m) sys.exit(1) else: logger.info('Everything seems to be in order') sys.exit(0) check_file(reporter, index_file, data) reporter.report() if __name__ == '__main__': Loading Loading
bin/lesson_check.py +13 −33 Original line number Diff line number Diff line Loading @@ -8,7 +8,6 @@ import sys import os import glob import json import yaml import re from optparse import OptionParser Loading Loading @@ -38,12 +37,6 @@ REQUIRED_FILES = { '%/setup.md': True, } # Required non-Markdown files. NON_MARKDOWN_FILES = { "AUTHORS", "CITATION" } # Episode filename pattern. P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$') Loading Loading @@ -97,10 +90,9 @@ def main(): """Main driver.""" args = parse_args() args.reporter = Reporter(args) check_config(args) check_non_markdown_files(args.source_dir, args.reporter) docs = read_all_markdown(args, args.source_dir) args.reporter = Reporter() check_config(args.reporter, args.source_dir) docs = read_all_markdown(args.source_dir, args.parser) check_fileset(args.source_dir, args.reporter, docs.keys()) for filename in docs.keys(): checker = create_checker(args, filename, docs[filename]) Loading Loading @@ -134,27 +126,15 @@ def parse_args(): return args def check_config(args): def check_config(reporter, source_dir): """Check configuration file.""" config_file = os.path.join(args.source_dir, '_config.yml') with open(config_file, 'r') as reader: config = yaml.load(reader) args.reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson') config_file = os.path.join(source_dir, '_config.yml') config = load_yaml(config_file) reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson') def check_non_markdown_files(source_dir, reporter): """Check presence of non-Markdown files.""" for filename in NON_MARKDOWN_FILES: path = os.path.join(source_dir, filename) reporter.check(os.path.exists(path), filename, "File not found") def read_all_markdown(args, source_dir): def read_all_markdown(source_dir, parser): """Read source files, returning {path : {'metadata':yaml, 'metadata_len':N, 'text':text, 'lines':[(i, line, len)], 'doc':doc}} """ Loading @@ -164,7 +144,7 @@ def read_all_markdown(args, source_dir): result = {} for pat in all_patterns: for filename in glob.glob(pat): data = read_markdown(args.parser, filename) data = read_markdown(parser, filename) if data: result[filename] = data return result Loading Loading @@ -239,6 +219,7 @@ class CheckBase(object): self.text = text self.lines = lines self.doc = doc self.layout = None Loading Loading @@ -371,7 +352,6 @@ class CheckEpisode(CheckBase): def __init__(self, args, filename, metadata, metadata_len, text, lines, doc): super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc) def check_metadata(self): super(CheckEpisode, self).check_metadata() if self.metadata: Loading
bin/util.py +43 −16 Original line number Diff line number Diff line import sys import json import yaml from subprocess import Popen, PIPE try: import yaml except ImportError: print('Unable to import YAML module: please install PyYAML', file=sys.stderr) sys.exit(1) class Reporter(object): """Collect and report errors.""" def __init__(self, args): def __init__(self): """Constructor.""" super(Reporter, self).__init__() Loading Loading @@ -62,23 +66,13 @@ def read_markdown(parser, path): """ # Split and extract YAML (if present). metadata = None metadata_len = None with open(path, 'r') as reader: body = reader.read() pieces = body.split('---', 2) if len(pieces) == 3: try: metadata = yaml.load(pieces[1]) except yaml.YAMLError as e: print('Unable to parse YAML header in {0}:\n{1}'.format(path, e)) sys.exit(1) metadata_len = pieces[1].count('\n') body = pieces[2] metadata_raw, metadata_yaml, body = split_metadata(path, body) # Split into lines. offset = 0 if metadata_len is None else metadata_len lines = [(offset+i+1, l, len(l)) for (i, l) in enumerate(body.split('\n'))] metadata_len = 0 if metadata_raw is None else metadata_raw.count('\n') lines = [(metadata_len+i+1, line, len(line)) for (i, line) in enumerate(body.split('\n'))] # Parse Markdown. cmd = 'ruby {0}'.format(parser) Loading @@ -87,9 +81,42 @@ def read_markdown(parser, path): doc = json.loads(stdout_data) return { 'metadata': metadata, 'metadata': metadata_yaml, 'metadata_len': metadata_len, 'text': body, 'lines': lines, 'doc': doc } def split_metadata(path, text): """ Get raw (text) metadata, metadata as YAML, and rest of body. If no metadata, return (None, None, body). """ metadata_raw = None metadata_yaml = None metadata_len = None pieces = text.split('---', 2) if len(pieces) == 3: metadata_raw = pieces[1] text = pieces[2] try: metadata_yaml = yaml.load(metadata_raw) except yaml.YAMLError as e: print('Unable to parse YAML header in {0}:\n{1}'.format(path, e), file=sys.stderr) sys.exit(1) return metadata_raw, metadata_yaml, text def load_yaml(filename): """ Wrapper around YAML loading so that 'import yaml' and error handling is only needed in one place. """ with open(filename, 'r') as reader: return yaml.load(reader)
bin/workshop_check.py +126 −155 Original line number Diff line number Diff line Loading @@ -7,37 +7,21 @@ docstrings on the checking functions for a summary of the checks. import sys import os import re import logging import yaml from collections import Counter __version__ = '0.6' # basic logging configuration logger = logging.getLogger(__name__) verbosity = logging.INFO # severity of at least INFO will emerge logger.setLevel(verbosity) # create console handler and set level to debug console_handler = logging.StreamHandler() console_handler.setLevel(verbosity) formatter = logging.Formatter('%(levelname)s: %(message)s') console_handler.setFormatter(formatter) logger.addHandler(console_handler) from datetime import date from util import Reporter, split_metadata # TODO: these regexp patterns need comments inside # Metadata field patterns. EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+' HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)' EVENTBRITE_PATTERN = r'\d{9,10}' URL_PATTERN = r'https?://.+' # Defaults. CARPENTRIES = ("dc", "swc") DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org' USAGE = 'Usage: "check-workshop path/to/index.html"\n' USAGE = 'Usage: "check-workshop path/to/root/directory"' # Country and language codes. Note that codes mean different things: 'ar' # is 'Arabic' as a language but 'Argentina' as a country. Loading Loading @@ -86,18 +70,9 @@ ISO_LANGUAGE = [ ] def add_error(msg, errors): """Add error to the list of errors.""" errors.append(msg) def add_suberror(msg, errors): """Add sub error, ie. error indented by 1 level ("\t"), to the list of errors.""" errors.append("\t{0}".format(msg)) def look_for_fixme(func): '''Decorator to fail test if text argument starts with "FIXME".''' """Decorator to fail test if text argument starts with "FIXME".""" def inner(arg): if (arg is not None) and \ isinstance(arg, str) and \ Loading Loading @@ -137,24 +112,26 @@ def check_language(language): @look_for_fixme def check_humandate(date): '''"humandate" must be a human-readable date with a 3-letter month and 4-digit year. Examples include "Feb 18-20, 2025" and "Feb 18 and 20, 2025". It may be in languages other than English, but the """ 'humandate' must be a human-readable date with a 3-letter month and 4-digit year. Examples include 'Feb 18-20, 2025' and 'Feb 18 and 20, 2025'. It may be in languages other than English, but the month name should be kept short to aid formatting of the main Software Carpentry web site.''' Software Carpentry web site. """ if "," not in date: if ',' not in date: return False month_dates, year = date.split(",") month_dates, year = date.split(',') # The first three characters of month_dates are not empty month = month_dates[:3] if any(char == " " for char in month): if any(char == ' ' for char in month): return False # But the fourth character is empty ("February" is illegal) if month_dates[3] != " ": if month_dates[3] != ' ': return False # year contains *only* numbers Loading @@ -168,65 +145,80 @@ def check_humandate(date): @look_for_fixme def check_humantime(time): '''"humantime" is a human-readable start and end time for the workshop, such as "09:00 - 16:00".''' """ 'humantime' is a human-readable start and end time for the workshop, such as '09:00 - 16:00'. """ return bool(re.match(HUMANTIME_PATTERN, time.replace(" ", ""))) return bool(re.match(HUMANTIME_PATTERN, time.replace(' ', ''))) def check_date(this_date): '''"startdate" and "enddate" are machine-readable start and end dates for the workshop, and must be in YYYY-MM-DD format, e.g., "2015-07-01".''' """ 'startdate' and 'enddate' are machine-readable start and end dates for the workshop, and must be in YYYY-MM-DD format, e.g., '2015-07-01'. """ from datetime import date # yaml automatically loads valid dates as datetime.date # YAML automatically loads valid dates as datetime.date. return isinstance(this_date, date) @look_for_fixme def check_latitude_longitude(latlng): '''"latlng" must be a valid latitude and longitude represented as two floating-point numbers separated by a comma.''' """ 'latlng' must be a valid latitude and longitude represented as two floating-point numbers separated by a comma. """ try: lat, lng = latlng.split(',') lat = float(lat) long = float(lng) return (-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0) except ValueError: return False return (-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0) def check_instructors(instructors): '''"instructor" must be a non-empty comma-separated list of quoted names, e.g. ['First name', 'Second name', ...']. Do not use "TBD" or other placeholders.''' """ 'instructor' must be a non-empty comma-separated list of quoted names, e.g. ['First name', 'Second name', ...']. Do not use 'TBD' or other placeholders. """ # yaml automatically loads list-like strings as lists # YAML automatically loads list-like strings as lists. return isinstance(instructors, list) and len(instructors) > 0 def check_helpers(helpers): '''"helper" must be a comma-separated list of quoted names, e.g. ['First name', 'Second name', ...']. The list may be empty. Do not use "TBD" or other placeholders.''' """ 'helper' must be a comma-separated list of quoted names, e.g. ['First name', 'Second name', ...']. The list may be empty. Do not use 'TBD' or other placeholders. """ # yaml automatically loads list-like strings as lists # YAML automatically loads list-like strings as lists. return isinstance(helpers, list) and len(helpers) >= 0 @look_for_fixme def check_email(email): '''"contact" must be a valid email address consisting of characters, a @, and more characters. It should not be the default contact email address "admin@software-carpentry.org".''' """ 'contact' must be a valid email address consisting of characters, an '@', and more characters. It should not be the default contact email address 'admin@software-carpentry.org'. """ return bool(re.match(EMAIL_PATTERN, email)) and \ (email != DEFAULT_CONTACT_EMAIL) def check_eventbrite(eventbrite): '''"eventbrite" (the Eventbrite registration key) must be 9 or more digits.''' """ 'eventbrite' (the Eventbrite registration key) must be 9 or more digits. It may appear as an integer or as a string. """ if isinstance(eventbrite, int): return True Loading @@ -236,15 +228,19 @@ def check_eventbrite(eventbrite): @look_for_fixme def check_etherpad(etherpad): '''"etherpad" must be a valid URL.''' """ 'etherpad' must be a valid URL. """ return bool(re.match(URL_PATTERN, etherpad)) @look_for_fixme def check_pass(value): '''This test always passes (it is used for "checking" things like addresses, for which no sensible validation is feasible).''' """ This test always passes (it is used for 'checking' things like the workshop address, for which no sensible validation is feasible). """ return True Loading @@ -265,38 +261,38 @@ HANDLERS = { 'humandate': (True, check_humandate, 'humandate invalid. Please use three-letter months like ' + '"Jan" and four-letter years like "2025".'), '"Jan" and four-letter years like "2025"'), 'humantime': (True, check_humantime, 'humantime doesn\'t include numbers'), 'startdate': (True, check_date, 'startdate invalid. Must be of format year-month-day, ' + 'i.e., 2014-01-31.'), 'i.e., 2014-01-31'), 'enddate': (False, check_date, 'enddate invalid. Must be of format year-month-day, i.e.,' + ' 2014-01-31.'), ' 2014-01-31'), 'latlng': (True, check_latitude_longitude, 'latlng invalid. Check that it is two floating point ' + 'numbers, separated by a comma.'), 'numbers, separated by a comma'), 'instructor': (True, check_instructors, 'instructor list isn\'t a valid list of format ' + '["First instructor", "Second instructor",..].'), '["First instructor", "Second instructor",..]'), 'helper': (True, check_helpers, 'helper list isn\'t a valid list of format ' + '["First helper", "Second helper",..].'), '["First helper", "Second helper",..]'), 'contact': (True, check_email, 'contact email invalid or still set to ' + '"{0}".'.format(DEFAULT_CONTACT_EMAIL)), 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid.'), 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid'), 'etherpad': (False, check_etherpad, 'Etherpad URL appears invalid.'), 'etherpad': (False, check_etherpad, 'Etherpad URL appears invalid'), 'venue': (False, check_pass, 'venue name not specified'), Loading @@ -310,102 +306,85 @@ REQUIRED = set([k for k in HANDLERS if HANDLERS[k][0]]) OPTIONAL = set([k for k in HANDLERS if not HANDLERS[k][0]]) def check_validity(data, function, errors, error_msg): '''Wrapper-function around the various check-functions.''' valid = function(data) if not valid: add_error(error_msg, errors) add_suberror('Offending entry is: "{0}"'.format(data), errors) return valid def check_blank_lines(reporter, raw): """ Blank lines are not allowed in category headers. """ def check_blank_lines(raw_data, errors, error_msg): '''Blank lines are not allowed in category headers.''' lines = [x.strip() for x in raw_data.split('\n')] if '' in lines: add_error(error_msg, errors) add_suberror('{0} blank lines found in header'.format(lines.count('')), errors) return False return True def check_categories(left, right, errors, error_msg): '''Report set difference of categories.''' result = left - right if result: add_error(error_msg, errors) add_suberror('Offending entries: {0}'.format(result), errors) return False return True lines = [(i, x) for (i, x) in enumerate(raw.strip().split('\n')) if not x.strip()] reporter.check(not lines, None, 'Blank line(s) in header: {0}', ', '.join(["{0}: {1}".format(i, x.rstrip()) for (i, x) in lines])) def get_header(text): '''Extract YAML header from raw data, returning (None, None) if no valid header found and (raw, parsed) if header found.''' def check_categories(reporter, left, right, msg): """ Report differences (if any) between two sets of categories. """ # YAML header must be right at the start of the file. if not text.startswith('---'): return None, None diff = left - right reporter.check(len(diff) == 0, None, '{0}: offending entries {1}', msg, sorted(list(diff))) # YAML header must start and end with '---' pieces = text.split('---') if len(pieces) < 3: return None, None # Return raw text and YAML-ized form. raw = pieces[1].strip() return raw, yaml.load(raw) def check_file(reporter, path, data): """ Get header from file, call all other functions, and check file for validity. """ def check_file(filename, data, errors): '''Get header from file, call all other functions and check file for validity. Return list of errors (empty when no errors).''' raw, header = get_header(data) if header is None: msg = ('Cannot find YAML header in given file "{0}".'.format(filename)) add_error(msg, errors) return errors # Get metadata as text and as YAML. raw, header, body = split_metadata(path, data) # Do we have any blank lines in the header? is_valid = check_blank_lines(raw, errors, 'There are blank lines in the header') check_blank_lines(reporter, raw) # Look through all header entries. If the category is in the input # file and is either required or we have actual data (as opposed to # a commented-out entry), we check it. If it *isn't* in the header # but is required, report an error. for category in HANDLERS: required, handler_function, error_message = HANDLERS[category] required, handler, message = HANDLERS[category] if category in header: if required or header[category]: is_valid &= check_validity(header[category], handler_function, errors, error_message) reporter.check(handler(header[category]), None, '{0}\n actual value "{1}"', message, header[category]) elif required: msg = 'index file is missing mandatory key "{0}"'.format(category) add_error(msg, errors) is_valid = False reporter.add(None, 'Missing mandatory key "{0}"', category) # Check whether we have missing or too many categories seen_categories = set(header.keys()) check_categories(reporter, REQUIRED, seen_categories, 'Missing categories') check_categories(reporter, seen_categories, REQUIRED.union(OPTIONAL), 'Superfluous categories') is_valid &= check_categories(REQUIRED, seen_categories, errors, 'There are missing categories') is_valid &= check_categories(seen_categories, REQUIRED.union(OPTIONAL), errors, 'There are superfluous categories') def check_config(reporter, filename): """ Check YAML configuration file. """ def check_config(filename, errors): '''Check YAML configuration file.''' config = load_yaml(filename) with open(filename, 'r') as reader: config = yaml.load(reader) kind = config.get('kind', None) reporter.check(kind == 'workshop', filename, 'Missing or unknown kind of event: {0}', kind) if config['kind'] != 'workshop': msg = 'Not configured as a workshop: found "{0}" instead'.format(config['kind']) add_error(msg, errors) carpentry = config.get('carpentry', None) reporter.check(carpentry in ('swc', 'dc'), filename, 'Missing or unknown carpentry: {0}', carpentry) def main(): Loading @@ -418,21 +397,13 @@ def main(): root_dir = sys.argv[1] index_file = os.path.join(root_dir, 'index.html') config_file = os.path.join(root_dir, '_config.yml') logger.info('Testing "{0}" and "{1}"'.format(index_file, config_file)) errors = [] check_config(config_file, errors) reporter = Reporter() check_config(reporter, config_file) with open(index_file) as reader: data = reader.read() check_file(index_file, data, errors) if errors: for m in errors: logger.error(m) sys.exit(1) else: logger.info('Everything seems to be in order') sys.exit(0) check_file(reporter, index_file, data) reporter.report() if __name__ == '__main__': Loading