# This was written for Python 2.7 # You are likely doing unscripted testing for MantidPlot/Workbench when using this script so basically follow this # advice as well as using this script and it's results. (Confirm these all work as well) # - Algorithm dialog snapshots should appear on algorithm pages in offline help # - Math formulae should appear on algorithm pages in offline help # - workflow diagrams should appear on algorithm pages in offline help # # Author: Samuel Jones - ISIS # Need to install BeautifulSoup: # pip install beautifulsoup from BeautifulSoup import BeautifulSoup import urllib2 import re import webbrowser import time import argparse def crawl_url_for_html_addons(url): parent_url = url parent_url = re.sub('index.html$', '', parent_url) html_page = urllib2.urlopen(url) soup = BeautifulSoup(html_page) urls = [] for link in soup.findAll('a', attrs={'href': re.compile(".html")}): html_ref = link.get('href') urls.append(parent_url + html_ref) return urls def open_urls(list_of_urls, delay=1): """ :param list_of_urls: :param delay: in seconds :return: """ for url in list_of_urls: time.sleep(delay) webbrowser.open(url) parser = argparse.ArgumentParser() parser.add_argument( '-d', '--open-tab-delay', type=int, help="Delay between each new page tab in seconds.") args = parser.parse_args() print(args.delay) all_urls = [] print("Crawling for Algorithm URLs...") algorithm_urls = crawl_url_for_html_addons("http://docs.mantidproject.org/nightly/algorithms/index.html") all_urls.extend(algorithm_urls) print("Crawling for Concept URLs...") concept_urls = crawl_url_for_html_addons("http://docs.mantidproject.org/nightly/concepts/index.html") all_urls.extend(concept_urls) print("Crawling for Interface URLs...") interface_urls = crawl_url_for_html_addons("http://docs.mantidproject.org/nightly/interfaces/index.html") all_urls.extend(interface_urls) print("Crawling for Technique URLs...") technique_urls = crawl_url_for_html_addons("http://docs.mantidproject.org/nightly/techniques/index.html") all_urls.extend(technique_urls) print("Crawling python api...") mantid_kernel_urls = crawl_url_for_html_addons("http://docs.mantidproject.org/nightly/api/python/mantid/kernel/" "index.html") mantid_geometry_urls = crawl_url_for_html_addons("http://docs.mantidproject.org/nightly/api/python/mantid/geometry/" "index.html") mantid_api_urls = crawl_url_for_html_addons("http://docs.mantidproject.org/nightly/api/python/mantid/api/" "index.html") # Only one mantid_plots_urls = ["http://docs.mantidproject.org/nightly/api/python/mantid/plots/index.html"] # Only one mantid_simpleapi_urls = ["http://docs.mantidproject.org/nightly/api/python/mantid/simpleapi.html"] # Only one mantid_fitfunctions = ["http://docs.mantidproject.org/nightly/api/python/mantid/fitfunctions.html"] mantidplot_urls = crawl_url_for_html_addons("http://docs.mantidproject.org/nightly/api/python/mantidplot/index.html") all_urls.extend(mantid_api_urls) all_urls.extend(mantid_fitfunctions) all_urls.extend(mantid_geometry_urls) all_urls.extend(mantid_kernel_urls) all_urls.extend(mantid_plots_urls) all_urls.extend(mantid_simpleapi_urls) all_urls.extend(mantidplot_urls) print("All webpages crawled") print("Opening Urls...") delay = args.delay if delay is None: delay = 1 open_urls(all_urls, delay) print("All URLs opened")