// Mantid Repository : https://github.com/mantidproject/mantid // // Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI, // NScD Oak Ridge National Laboratory, European Spallation Source // & Institut Laue - Langevin // SPDX - License - Identifier: GPL - 3.0 + //---------------------------------------------------------------------- // Includes //---------------------------------------------------------------------- #include "MantidAPI/FileFinder.h" #include "MantidAPI/ArchiveSearchFactory.h" #include "MantidAPI/FrameworkManager.h" #include "MantidAPI/IArchiveSearch.h" #include "MantidKernel/ConfigService.h" #include "MantidKernel/Exception.h" #include "MantidKernel/FacilityInfo.h" #include "MantidKernel/Glob.h" #include "MantidKernel/InstrumentInfo.h" #include "MantidKernel/Strings.h" #include <MantidKernel/StringTokenizer.h> #include <Poco/Exception.h> #include <Poco/File.h> #include <Poco/Path.h> #include <boost/lexical_cast.hpp> #include <boost/regex.hpp> #include <algorithm> #include <cctype> #include <boost/algorithm/string.hpp> namespace { /// static logger object Mantid::Kernel::Logger g_log("FileFinder"); /** * Unary predicate for use with remove_if. Checks for the existance of * a "*" wild card in the file extension string passed to it. * * @param ext :: the extension to check. * * @returns true if extension contains a "*", else false. */ bool containsWildCard(const std::string &ext) { return std::string::npos != ext.find('*'); } } // namespace namespace Mantid { namespace API { using std::string; // this allowed string could be made into an array of allowed, currently used // only by the ISIS SANS group const std::string FileFinderImpl::ALLOWED_SUFFIX = "-add"; //---------------------------------------------------------------------- // Public member functions //---------------------------------------------------------------------- /** * Default constructor */ FileFinderImpl::FileFinderImpl() { // Make sure plugins are loaded FrameworkManager::Instance().loadPlugins(); // determine from Mantid property how sensitive Mantid should be #ifdef _WIN32 m_globOption = Poco::Glob::GLOB_DEFAULT; #else setCaseSensitive(Kernel::ConfigService::Instance() .getValue<bool>("filefinder.casesensitive") .get_value_or(false)); #endif } /** * Option to set if file finder should be case sensitive * @param cs :: If true then set to case sensitive */ void FileFinderImpl::setCaseSensitive(const bool cs) { if (cs) m_globOption = Poco::Glob::GLOB_DEFAULT; else m_globOption = Poco::Glob::GLOB_CASELESS; } /** * Option to get if file finder should be case sensitive * @return cs :: If case sensitive return true, if not case sensitive return * false */ bool FileFinderImpl::getCaseSensitive() const { return (m_globOption == Poco::Glob::GLOB_DEFAULT); } /** * Return the full path to the file given its name * @param filename :: A file name (without path) including extension * @param ignoreDirs :: If true, directories that match are skipped unless the * path given is already absolute * @return The full path if the file exists and can be found in one of the * search locations * or an empty string otherwise. */ std::string FileFinderImpl::getFullPath(const std::string &filename, const bool ignoreDirs) const { return Kernel::ConfigService::Instance().getFullPath(filename, ignoreDirs, m_globOption); } /** Run numbers can be followed by an allowed string. Check if there is * one, remove it from the name and return the string, else return empty * @param userString run number that may have a suffix * @return the suffix, if there was one */ std::string FileFinderImpl::extractAllowedSuffix(std::string &userString) const { if (userString.find(ALLOWED_SUFFIX) == std::string::npos) { // short cut processing as normally there is no suffix return ""; } // ignore any file extension in checking if a suffix is present Poco::Path entry(userString); std::string noExt(entry.getBaseName()); const size_t repNumChars = ALLOWED_SUFFIX.size(); if (noExt.find(ALLOWED_SUFFIX) == noExt.size() - repNumChars) { userString.replace(userString.size() - repNumChars, repNumChars, ""); return ALLOWED_SUFFIX; } return ""; } /** * Return the InstrumentInfo as determined from the hint. * * @param hint :: The name hint. * @return This will return the default instrument if it cannot be determined. */ const Kernel::InstrumentInfo FileFinderImpl::getInstrument(const string &hint) const { if ((!hint.empty()) && (!isdigit(hint[0]))) { string instrName(hint); Poco::Path path(instrName); instrName = path.getFileName(); if ((instrName.find("PG3") == 0) || (instrName.find("pg3") == 0)) { instrName = "PG3"; } // We're extending this nasty hack to accomodate data archive searching for // SANS2D. // While this certainly shouldn't be considered good practice, #7515 exists // to // completely redesign FileFinder -- this quick fix will have to do until // all this // code gets an overhaul as part of that ticket. Please think twice before // adding // any more instruments to this list. else if ((instrName.find("SANS2D") == 0) || (instrName.find("sans2d") == 0)) { instrName = "SANS2D"; } else { // go forwards looking for the run number to start { string::const_iterator it = std::find_if( instrName.begin(), instrName.end(), std::ptr_fun(isdigit)); std::string::size_type nChars = std::distance( static_cast<string::const_iterator>(instrName.begin()), it); instrName = instrName.substr(0, nChars); } // go backwards looking for the instrument name to end - gets around // delimiters if (!instrName.empty()) { string::const_reverse_iterator it = std::find_if( instrName.rbegin(), instrName.rend(), std::ptr_fun(isalpha)); string::size_type nChars = std::distance( it, static_cast<string::const_reverse_iterator>(instrName.rend())); instrName = instrName.substr(0, nChars); } } try { const Kernel::InstrumentInfo instrument = Kernel::ConfigService::Instance().getInstrument(instrName); return instrument; } catch (Kernel::Exception::NotFoundError &e) { g_log.debug() << e.what() << "\n"; } } return Kernel::ConfigService::Instance().getInstrument(); } /** * Extracts the instrument name and run number from a hint * @param hint :: The name hint * @return A pair of instrument name and run number */ std::pair<std::string, std::string> FileFinderImpl::toInstrumentAndNumber(const std::string &hint) const { // g_log.debug() << "toInstrumentAndNumber(" << hint << ")\n"; std::string instrPart; std::string runPart; if (isdigit(hint[0])) { instrPart = Kernel::ConfigService::Instance().getInstrument().shortName(); runPart = hint; } else { /// Find the last non-digit as the instrument name can contain numbers std::string::const_reverse_iterator it = std::find_if( hint.rbegin(), hint.rend(), std::not1(std::ptr_fun(isdigit))); // No non-digit or all non-digits if (it == hint.rend() || it == hint.rbegin()) { throw std::invalid_argument( "Malformed hint to FileFinderImpl::makeFileName: " + hint); } std::string::size_type nChars = std::distance(it, hint.rend()); // Add in special test for PG3 if (boost::algorithm::istarts_with(hint, "PG3")) { instrPart = "PG3"; nChars = instrPart.length(); } // Another nasty check for SANS2D. Will do until FileFinder redesign. else if (boost::algorithm::istarts_with(hint, "SANS2D")) { instrPart = "SANS2D"; nChars = instrPart.length(); } else { instrPart = hint.substr(0, nChars); } runPart = hint.substr(nChars); } unsigned int irunPart(0); try { irunPart = boost::lexical_cast<unsigned int>(runPart); } catch (boost::bad_lexical_cast &) { std::ostringstream os; os << "Cannot convert '" << runPart << "' to run number."; throw std::invalid_argument(os.str()); } Kernel::InstrumentInfo instr = Kernel::ConfigService::Instance().getInstrument(instrPart); size_t nZero = instr.zeroPadding(irunPart); // remove any leading zeros in case there are too many of them std::string::size_type i = runPart.find_first_not_of('0'); runPart.erase(0, i); while (runPart.size() < nZero) runPart.insert(0, "0"); if (runPart.size() > nZero && nZero != 0) { throw std::invalid_argument( "Run number does not match instrument's zero padding"); } instrPart = instr.filePrefix(irunPart); return std::make_pair(instrPart, runPart); } /** * Make a data file name (without extension) from a hint. The hint can be either * a run number or * a run number prefixed with an instrument name/short name. If the instrument * name is absent the default one is used. * @param hint :: The name hint * @param instrument :: The current instrument object * @return The file name * @throw NotFoundError if a required default is not set * @throw std::invalid_argument if the argument is malformed or run number is * too long */ std::string FileFinderImpl::makeFileName(const std::string &hint, const Kernel::InstrumentInfo &instrument) const { // g_log.debug() << "makeFileName(" << hint << ", " << instrument.shortName() // << ")\n"; if (hint.empty()) return ""; std::string filename(hint); const std::string suffix = extractAllowedSuffix(filename); const std::string shortName = instrument.shortName(); std::string delimiter = instrument.delimiter(); // see if starts with the provided instrument name if (filename.substr(0, shortName.size()) == shortName) { filename = filename.substr(shortName.size()); if ((!delimiter.empty()) && (filename.substr(0, delimiter.size()) == delimiter)) filename = filename.substr(delimiter.size()); filename = shortName + filename; } std::pair<std::string, std::string> p = toInstrumentAndNumber(filename); filename = p.first; if (!delimiter.empty()) { filename += delimiter; } filename += p.second; if (!suffix.empty()) { filename += suffix; } return filename; } /** * Determine the extension from a filename. * * @param filename The filename to get the extension from. * @param exts The list of extensions to try before giving up and * using the default: whatever happens after the '.'. * * @return The extension. If one isn't determined it is an empty string. */ std::string FileFinderImpl::getExtension(const std::string &filename, const std::vector<std::string> &exts) const { g_log.debug() << "getExtension(" << filename << ", exts[" << exts.size() << "])\n"; // go through the list of supplied extensions for (const auto &ext : exts) { std::string extension = toUpper(ext); if (extension.rfind('*') == extension.size() - 1) // there is a wildcard at play { extension = extension.substr(0, extension.rfind('*')); } std::size_t found = toUpper(filename).rfind(extension); if (found != std::string::npos) { g_log.debug() << "matched extension \"" << extension << "\" based on \"" << ext << "\"\n"; return filename.substr(found); // grab the actual extensions found } } g_log.debug() << "Failed to find extension. Just using last \'.\'\n"; std::size_t pos = filename.find_last_of('.'); if (pos != std::string::npos) { return filename.substr(pos); } // couldn't find an extension return ""; } std::vector<IArchiveSearch_sptr> FileFinderImpl::getArchiveSearch(const Kernel::FacilityInfo &facility) const { std::vector<IArchiveSearch_sptr> archs; // get the searchive option from config service and format it std::string archiveOpt = Kernel::ConfigService::Instance().getString("datasearch.searcharchive"); std::transform(archiveOpt.begin(), archiveOpt.end(), archiveOpt.begin(), tolower); // if it is turned off, not specified, or the facility doesn't have // IArchiveSearch defined, return an empty vector if (archiveOpt.empty() || archiveOpt == "off" || facility.archiveSearch().empty()) return archs; // determine if the user wants archive search for this facility bool createArchiveSearch = bool(archiveOpt == "all"); // then see if the facility name appears in the list or if we just want the // default facility if (!createArchiveSearch) { std::string faciltyName = facility.name(); std::transform(faciltyName.begin(), faciltyName.end(), faciltyName.begin(), tolower); if (archiveOpt == "on") { // only default facilty std::string defaultFacility = Kernel::ConfigService::Instance().getString("default.facility"); std::transform(defaultFacility.begin(), defaultFacility.end(), defaultFacility.begin(), tolower); createArchiveSearch = bool(faciltyName == defaultFacility); } else { // everything in the list createArchiveSearch = bool(archiveOpt.find(faciltyName) != std::string::npos); } } // put together the list of IArchiveSearch to use if (createArchiveSearch) { for (const auto &facilityname : facility.archiveSearch()) { g_log.debug() << "get archive search for the facility..." << facilityname << "\n"; archs.push_back(ArchiveSearchFactory::Instance().create(facilityname)); } } return archs; } std::string FileFinderImpl::findRun(const std::string &hintstr, const std::vector<std::string> &exts) const { std::string hint = Kernel::Strings::strip(hintstr); g_log.debug() << "vector findRun(\'" << hint << "\', exts[" << exts.size() << "])\n"; // if partial filename or run number is not supplied, return here if (hint.empty()) return ""; // if it looks like a full filename just do a quick search for it Poco::Path hintPath(hint); if (!hintPath.getExtension().empty()) { // check in normal search locations g_log.debug() << "hintPath is not empty, check in normal search locations" << "\n"; std::string path = getFullPath(hint); if (!path.empty()) { try { if (Poco::File(path).exists()) { g_log.information() << "found path = " << path << '\n'; return path; } } catch (Poco::Exception &) { } } else { g_log.debug() << "Unable to find files via directory search with the " "filename that looks like a full filename" << "\n"; } } // get instrument and facility const Kernel::InstrumentInfo instrument = this->getInstrument(hint); const Kernel::FacilityInfo &facility = instrument.facility(); // get facility extensions const std::vector<std::string> facility_extensions = facility.extensions(); // select allowed extensions std::vector<std::string> extensions; g_log.debug() << "Add facility extensions defined in the Facility.xml file" << "\n"; extensions.assign(facility_extensions.begin(), facility_extensions.end()); // Do we need to try and form a filename from our preset rules std::string filename(hint); std::string extension = getExtension(hint, extensions); if (!extensions.empty()) filename = hint.substr(0, hint.rfind(extension)); if (hintPath.depth() == 0) { try { if (!facility.noFilePrefix()) { filename = makeFileName(filename, instrument); } } catch (std::invalid_argument &) { if (filename.length() >= hint.length()) { g_log.information() << "Could not form filename from standard rules '" << filename << "'\n"; } } } // Look first at the original filename then for case variations. This is // important // on platforms where file names ARE case sensitive. // Sorry for the duplication, a last minute fix was required. Ticket #6419 is // tasked with a redesign of // the whole file finding concept. std::set<std::string> filenames; filenames.insert(filename); if (!getCaseSensitive()) { std::string transformed(filename); std::transform(filename.begin(), filename.end(), transformed.begin(), toupper); filenames.insert(transformed); std::transform(filename.begin(), filename.end(), transformed.begin(), tolower); filenames.insert(transformed); } // Merge the extensions & throw out duplicates // On Windows throw out ones that only vary in case // std::vector<std::string> uniqueExts; // uniqueExts.reserve(1 + exts.size() + extensions.size()); std::set<std::string> uniqueExtsSet; if (!extension.empty()) uniqueExtsSet.insert(extension); getUniqueExtensions(exts, uniqueExtsSet); getUniqueExtensions(extensions, uniqueExtsSet); std::vector<std::string> uniqueExts; for (const auto &it : uniqueExtsSet) { uniqueExts.push_back(it); } // determine which archive search facilities to use std::vector<IArchiveSearch_sptr> archs = getArchiveSearch(facility); std::string path = getPath(archs, filenames, uniqueExts); if (!path.empty()) { g_log.information() << "found path = " << path << '\n'; return path; } else { g_log.information() << "Unable to find run with hint " << hint << "\n"; } g_log.information() << "Unable to find file path for " << hint << "\n"; return ""; } /** * Given a set of already determined extensions and new extensions, * create a set of all extensions. * If not in an extension-is-case-sensitive environment, only add the * lower case OR upper case version of the extension * @param exts :: a vector of extensions to add * @param uniqueExts :: a set of currently included extensions */ void FileFinderImpl::getUniqueExtensions( const std::vector<std::string> &exts, std::set<std::string> &uniqueExts) const { for (const auto &cit : exts) { if (!getCaseSensitive()) // prune case variations - this is a hack, see // findRun { std::string transformed(cit); std::transform(cit.begin(), cit.end(), transformed.begin(), tolower); auto searchItr = uniqueExts.find(cit); if (searchItr != uniqueExts.end()) continue; std::transform(cit.begin(), cit.end(), transformed.begin(), toupper); uniqueExts.insert(cit); } else { uniqueExts.insert(cit); } } } /** * Find a list of files file given a hint. Calls findRun internally. * @param hintstr :: Comma separated list of hints to findRun method. * Can also include ranges of runs, e.g. 123-135 or equivalently 123-35. * Only the beginning of a range can contain an instrument name. * @param exts :: Vector of allowed file extensions. Optional. * If provided, this provides the only extensions searched for. * If not provided, facility extensions used. * @return A vector of full paths or empty vector * @throw std::invalid_argument if the argument is malformed * @throw Exception::NotFoundError if a file could not be found */ std::vector<std::string> FileFinderImpl::findRuns(const std::string &hintstr, const std::vector<std::string> &exts) const { std::string hint = Kernel::Strings::strip(hintstr); g_log.debug() << "findRuns hint = " << hint << "\n"; std::vector<std::string> res; Mantid::Kernel::StringTokenizer hints( hint, ",", Mantid::Kernel::StringTokenizer::TOK_TRIM | Mantid::Kernel::StringTokenizer::TOK_IGNORE_EMPTY); static const boost::regex digits("[0-9]+"); auto h = hints.begin(); for (; h != hints.end(); ++h) { // Quick check for a filename bool fileSuspected = false; // Assume if the hint contains either a "/" or "\" it is a filename.. if ((*h).find("\\") != std::string::npos) { fileSuspected = true; } if ((*h).find("/") != std::string::npos) { fileSuspected = true; } if ((*h).find(ALLOWED_SUFFIX) != std::string::npos) { fileSuspected = true; } Mantid::Kernel::StringTokenizer range( *h, "-", Mantid::Kernel::StringTokenizer::TOK_TRIM | Mantid::Kernel::StringTokenizer::TOK_IGNORE_EMPTY); if ((range.count() > 2) && (!fileSuspected)) { throw std::invalid_argument("Malformed range of runs: " + *h); } else if ((range.count() == 2) && (!fileSuspected)) { std::pair<std::string, std::string> p1 = toInstrumentAndNumber(range[0]); std::string run = p1.second; size_t nZero = run.size(); // zero padding if (range[1].size() > nZero) { throw std::invalid_argument("Malformed range of runs: " + *h + ". The end of string value is longer than " "the instrument's zero padding"); } int runNumber = boost::lexical_cast<int>(run); std::string runEnd = run; // Adds zero padding to end of range. runEnd.replace(runEnd.end() - range[1].size(), runEnd.end(), range[1]); // Throw if runEnd contains something else other than a digit. if (!boost::regex_match(runEnd, digits)) throw std::invalid_argument("Malformed range of runs: Part of the run " "has a non-digit character in it."); int runEndNumber = boost::lexical_cast<int>(runEnd); if (runEndNumber < runNumber) { throw std::invalid_argument("Malformed range of runs: " + *h); } for (int irun = runNumber; irun <= runEndNumber; ++irun) { run = std::to_string(irun); while (run.size() < nZero) run.insert(0, "0"); std::string path = findRun(p1.first + run, exts); if (!path.empty()) { res.push_back(path); } else { throw Kernel::Exception::NotFoundError("Unable to find file:", run); } } } else { std::string path = findRun(*h, exts); if (!path.empty()) { res.push_back(path); } else { throw Kernel::Exception::NotFoundError("Unable to find file:", *h); } } } return res; } /** * Return the path to the file found in archive * @param archs :: A list of archives to search * @param filenames :: A list of filenames (without extensions) to pass to the * archive * @param exts :: A list of extensions to check for in turn against each file * @return The full path if the file exists and can be found in one of the * search locations * or an empty string otherwise. */ std::string FileFinderImpl::getArchivePath(const std::vector<IArchiveSearch_sptr> &archs, const std::set<std::string> &filenames, const std::vector<std::string> &exts) const { g_log.debug() << "getArchivePath([IArchiveSearch_sptr], [ "; for (const auto &iter : filenames) g_log.debug() << iter << " "; g_log.debug() << "], [ "; for (const auto &iter : exts) g_log.debug() << iter << " "; g_log.debug() << "])\n"; std::string path; for (const auto &arch : archs) { try { g_log.debug() << "Getting archive path for requested files\n"; path = arch->getArchivePath(filenames, exts); if (!path.empty()) { return path; } } catch (...) { } } return path; } /** * Return the full path to the file given its name, checking local directories * first. * @param archs :: A list of archives to search * @param filenames :: A list of filenames (without extensions) to pass to the * archive * @param exts :: A list of extensions to check for in turn against each file * @return The full path if the file exists and can be found in one of the * search locations * or an empty string otherwise. */ std::string FileFinderImpl::getPath(const std::vector<IArchiveSearch_sptr> &archs, const std::set<std::string> &filenames, const std::vector<std::string> &exts) const { std::string path; std::vector<std::string> extensions; extensions.assign(exts.begin(), exts.end()); // Remove wild cards. extensions.erase( std::remove_if(extensions.begin(), extensions.end(), containsWildCard), extensions.end()); const std::vector<std::string> &searchPaths = Kernel::ConfigService::Instance().getDataSearchDirs(); // Before we try any globbing, make sure we exhaust all reasonable attempts at // constructing the possible filename. // Avoiding the globbing of getFullPath() for as long as possible will help // performance when calling findRuns() // with a large range of files, especially when searchPaths consists of // folders containing a large number of runs. for (auto &extension : extensions) { for (const auto &filename : filenames) { for (const auto &searchPath : searchPaths) { try { Poco::Path path(searchPath, filename + extension); Poco::File file(path); if (file.exists()) return path.toString(); } catch (Poco::Exception &) { /* File does not exist, just carry on. */ } } } } for (const auto &extension : extensions) { for (const auto &filename : filenames) { path = getFullPath(filename + extension); try { if (!path.empty() && Poco::File(path).exists()) { g_log.debug() << "path returned from getFullPath() = " << path << '\n'; return path; } } catch (std::exception &e) { g_log.error() << "Cannot open file " << path << ": " << e.what() << '\n'; return ""; } } } // Search the archive if (!archs.empty()) { g_log.debug() << "Search the archives\n"; std::string path = getArchivePath(archs, filenames, exts); try { if (!path.empty() && Poco::File(path).exists()) { return path; } } catch (std::exception &e) { g_log.error() << "Cannot open file " << path << ": " << e.what() << '\n'; return ""; } } // archs return ""; } std::string FileFinderImpl::toUpper(const std::string &src) const { std::string result = src; std::transform(result.begin(), result.end(), result.begin(), toupper); return result; } } // namespace API } // namespace Mantid