Newer
Older
//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "MantidAPI/FileFinder.h"
#include "MantidAPI/IArchiveSearch.h"
#include "MantidAPI/ArchiveSearchFactory.h"
#include "MantidKernel/ConfigService.h"
Peterson, Peter
committed
#include "MantidKernel/Exception.h"
#include "MantidKernel/FacilityInfo.h"
#include "MantidKernel/InstrumentInfo.h"
#include "MantidKernel/LibraryManager.h"
#include "MantidKernel/Glob.h"
#include <Poco/Path.h>
#include <Poco/File.h>
#include <Poco/StringTokenizer.h>
#include <Poco/Exception.h>
#include <boost/regex.hpp>
#include <boost/lexical_cast.hpp>
#include <cctype>
#include <algorithm>
#include <boost/algorithm/string.hpp>
namespace
{
/**
* Unary predicate for use with remove_if. Checks for the existance of
* a "*" wild card in the file extension string passed to it.
*
* @param ext :: the extension to check.
*
* @returns true if extension contains a "*", else false.
bool containsWildCard(const std::string & ext)
if (std::string::npos != ext.find("*"))
return true;
return false;
}
Campbell, Stuart
committed
namespace API
Gigg, Martyn Anthony
committed
{
Peterson, Peter
committed
using std::string;
Campbell, Stuart
committed
// this allowed string could be made into an array of allowed, currently used only by the ISIS SANS group
const std::string FileFinderImpl::ALLOWED_SUFFIX = "-add";
//----------------------------------------------------------------------
// Public member functions
//----------------------------------------------------------------------
/**
* Default constructor
*/
FileFinderImpl::FileFinderImpl() : g_log(Mantid::Kernel::Logger::get("FileFinderImpl"))
Campbell, Stuart
committed
// Make sure plugins are loaded
std::string libpath = Kernel::ConfigService::Instance().getString("plugins.directory");
if (!libpath.empty())
Campbell, Stuart
committed
Kernel::LibraryManager::Instance().OpenAllLibraries(libpath);
// determine from Mantid property how sensitive Mantid should be
std::string casesensitive = Mantid::Kernel::ConfigService::Instance().getString("filefinder.casesensitive");
if ( boost::iequals("Off",casesensitive) )
globOption = Poco::Glob::GLOB_CASELESS;
else
globOption = Poco::Glob::GLOB_DEFAULT;
Campbell, Stuart
committed
/**
* Option to set if file finder should be case sensitive
* @param cs :: If true then set to case sensitive
*/
void FileFinderImpl::setCaseSensitive(const bool cs)
{
if ( cs )
globOption = Poco::Glob::GLOB_DEFAULT;
else
globOption = Poco::Glob::GLOB_CASELESS;
}
/**
* Option to get if file finder should be case sensitive
* @return cs :: If case sensitive return true, if not case sensitive return false
*/
int FileFinderImpl::getCaseSensitive()
{
return globOption;
}
Campbell, Stuart
committed
/**
* Return the full path to the file given its name
* @param fName :: A full file name (without path) including extension
* @return The full path if the file exists and can be found in one of the search locations
* or an empty string otherwise.
*/
std::string FileFinderImpl::getFullPath(const std::string& fName) const
g_log.debug() << "getFullPath(" << fName << ")\n";
Campbell, Stuart
committed
// If this is already a full path, nothing to do
if (Poco::Path(fName).isAbsolute())
return fName;
// First try the path relative to the current directory. Can throw in some circumstances with extensions that have wild cards
try
{
Poco::File fullPath(Poco::Path().resolve(fName));
if (fullPath.exists())
return fullPath.path();
}
catch (std::exception&)
{
}
const std::vector<std::string>& searchPaths =
Kernel::ConfigService::Instance().getDataSearchDirs();
std::vector<std::string>::const_iterator it = searchPaths.begin();
for (; it != searchPaths.end(); ++it)
// On windows globbing is note working properly with network drives
// for example a network drive containing a $
// For this reason, and since windows is case insensitive anyway
// a special case is made for windows
#ifdef _WIN32
if (fName.find("*") != std::string::npos)
{
#endif
Campbell, Stuart
committed
Poco::Path path(*it, fName);
Poco::Path pathPattern(path);
std::set < std::string > files;
Kernel::Glob::glob(pathPattern, files, globOption);
Campbell, Stuart
committed
if (!files.empty())
{
return *files.begin();
}
#ifdef _WIN32
}
else
{
Poco::Path path(*it, fName);
Poco::File file(path);
if (file.exists())
{
return path.toString();
}
}
#endif
Campbell, Stuart
committed
return "";
Campbell, Stuart
committed
/** Run numbers can be followed by an allowed string. Check if there is
* one, remove it from the name and return the string, else return empty
* @param userString run number that may have a suffix
* @return the suffix, if there was one
*/
std::string FileFinderImpl::extractAllowedSuffix(std::string & userString) const
Campbell, Stuart
committed
if (userString.find(ALLOWED_SUFFIX) == std::string::npos)
{
//short cut processing as normally there is no suffix
return "";
}
// ignore any file extension in checking if a suffix is present
Poco::Path entry(userString);
std::string noExt(entry.getBaseName());
Campbell, Stuart
committed
if (noExt.find(ALLOWED_SUFFIX) == noExt.size() - repNumChars)
{
userString.replace(userString.size() - repNumChars, repNumChars, "");
return ALLOWED_SUFFIX;
}
return "";
Peterson, Peter
committed
/**
* Return the name of the facility as determined from the hint.
*
* @param hint :: The name hint.
* @return This will return the default facility if it cannot be determined.
*/
const Kernel::FacilityInfo FileFinderImpl::getFacility(const string& hint) const
{
if ((!hint.empty()) && (!isdigit(hint[0])))
{
string instrName(hint);
Poco::Path path(instrName);
instrName = path.getFileName();
if ((instrName.find("PG3") == 0) || (instrName.find("pg3") == 0))
Peterson, Peter
committed
{
Peterson, Peter
committed
}
else
{
// go forwards looking for the run number to start
{
string::const_iterator it = std::find_if(instrName.begin(), instrName.end(), std::ptr_fun(isdigit));
std::string::size_type nChars = std::distance( static_cast<string::const_iterator>(instrName.begin()), it);
instrName = instrName.substr(0, nChars);
Peterson, Peter
committed
}
Peterson, Peter
committed
// go backwards looking for the instrument name to end - gets around delimiters
if (!instrName.empty())
{
string::const_reverse_iterator it = std::find_if(instrName.rbegin(), instrName.rend(),
std::ptr_fun(isalpha));
string::size_type nChars = std::distance(it,
static_cast<string::const_reverse_iterator>(instrName.rend()));
instrName = instrName.substr(0, nChars);
}
}
try {
const Kernel::InstrumentInfo instrument = Kernel::ConfigService::Instance().getInstrument(instrName);
return instrument.facility();
} catch (Kernel::Exception::NotFoundError &e) {
g_log.debug() << e.what() << "\n";
}
}
return Kernel::ConfigService::Instance().getFacility();
Peterson, Peter
committed
}
Campbell, Stuart
committed
/**
* Extracts the instrument name and run number from a hint
* @param hint :: The name hint
* @return A pair of instrument name and run number
*/
std::pair<std::string, std::string> FileFinderImpl::toInstrumentAndNumber(const std::string& hint) const
{
std::string instrPart;
std::string runPart;
if (isdigit(hint[0]))
{
Peterson, Peter
committed
instrPart = Kernel::ConfigService::Instance().getInstrument().shortName();
Campbell, Stuart
committed
runPart = hint;
}
else
{
/// Find the last non-digit as the instrument name can contain numbers
std::string::const_reverse_iterator it = std::find_if(hint.rbegin(), hint.rend(),
std::not1(std::ptr_fun(isdigit)));
// No non-digit or all non-digits
if (it == hint.rend() || it == hint.rbegin())
{
throw std::invalid_argument("Malformed hint to FileFinderImpl::makeFileName: " + hint);
}
std::string::size_type nChars = std::distance(it, hint.rend());
Peterson, Peter
committed
// PG3 is a special case (name ends in a number)- don't trust them
if ((hint.find("PG3") == 0) || (hint.find("pg3") == 0)) {
instrPart = "PG3";
if (nChars < 3)
nChars++;
Peterson, Peter
committed
}
else {
instrPart = hint.substr(0, nChars);
Campbell, Stuart
committed
}
runPart = hint.substr(nChars);
Campbell, Stuart
committed
}
unsigned int irunPart = boost::lexical_cast<unsigned int>( runPart );
Peterson, Peter
committed
Kernel::InstrumentInfo instr = Kernel::ConfigService::Instance().getInstrument(instrPart);
size_t nZero = instr.zeroPadding(irunPart);
Campbell, Stuart
committed
// remove any leading zeros in case there are too many of them
std::string::size_type i = runPart.find_first_not_of('0');
runPart.erase(0, i);
while (runPart.size() < nZero)
runPart.insert(0, "0");
if (runPart.size() > nZero && nZero != 0)
{
throw std::invalid_argument("Run number does not match instrument's zero padding");
}
instrPart = instr.filePrefix(irunPart);
Campbell, Stuart
committed
return std::make_pair(instrPart, runPart);
Campbell, Stuart
committed
}
Campbell, Stuart
committed
/**
* Make a data file name (without extension) from a hint. The hint can be either a run number or
* a run number prefixed with an instrument name/short name. If the instrument
* name is absent the default one is used.
* @param hint :: The name hint
* @param facility :: The current facility object
Campbell, Stuart
committed
* @return The file name
* @throw NotFoundError if a required default is not set
* @throw std::invalid_argument if the argument is malformed or run number is too long
*/
Peterson, Peter
committed
std::string FileFinderImpl::makeFileName(const std::string& hint, const Kernel::FacilityInfo& facility) const
Campbell, Stuart
committed
{
if (hint.empty())
return "";
Steve Williams
committed
Campbell, Stuart
committed
std::string filename(hint);
const std::string suffix = extractAllowedSuffix(filename);
std::pair < std::string, std::string > p = toInstrumentAndNumber(filename);
Peterson, Peter
committed
std::string delimiter = facility.delimiter();
Steve Williams
committed
Campbell, Stuart
committed
filename = p.first;
if (!delimiter.empty())
{
filename += delimiter;
}
filename += p.second;
if (!suffix.empty())
Campbell, Stuart
committed
filename += suffix;
Campbell, Stuart
committed
return filename;
Gigg, Martyn Anthony
committed
Campbell, Stuart
committed
/**
* Find the file given a hint. If the name contains a dot(.) then it is assumed that it is already a file stem
* otherwise calls makeFileName internally.
* @param hint :: The name hint, format: [INSTR]1234[.ext]
Campbell, Stuart
committed
* @param exts :: Optional list of allowed extensions. Only those extensions found in both
* facilities extension list and exts will be used in the search. If an extension is given in hint
* this argument is ignored.
Campbell, Stuart
committed
* @return The full path to the file or empty string if not found
*/
std::string FileFinderImpl::findRun(const std::string& hint, const std::set<std::string> *exts) const
{
g_log.debug() << "set findRun(\'" << hint << "\', exts[" << exts->size() << "])\n";
if (hint.empty())
return "";
std::vector<std::string> exts_v;
if (exts != NULL && exts->size() > 0)
exts_v.assign(exts->begin(), exts->end());
return this->findRun(hint, exts_v);
}
std::string FileFinderImpl::findRun(const std::string& hint,const std::vector<std::string> &exts)const
g_log.debug() << "vector findRun(\'" << hint << "\', exts[" << exts.size() << "])\n";
//if partial filename or run number is not supplied, return here
if (hint.empty())
return "";
// if it looks like a full filename just do a quick search for it
Poco::Path hintPath(hint);
if (!hintPath.getExtension().empty())
{
// check in normal search locations
g_log.debug() << "hintPath is not empty, check in normal search locations" << "\n";
std::string path = getFullPath(hint);
if (Poco::File(path).exists() )
{
g_log.information() << "found path = " << path << '\n';
return path;
}
} catch (Poco::Exception &) { }
}
else
{
g_log.debug() << "Unable to find files via directory search with the filename that looks like a full filename" << "\n";
const Kernel::FacilityInfo facility = this->getFacility(hint);
// initialize the archive searcher
std::vector<IArchiveSearch_sptr> archs;
{ // hide in a local namespace so things fall out of scope
std::string archiveOpt = Kernel::ConfigService::Instance().getString("datasearch.searcharchive");
std::transform(archiveOpt.begin(), archiveOpt.end(), archiveOpt.begin(), tolower);
if (!archiveOpt.empty() && archiveOpt != "off" && !facility.archiveSearch().empty())
{
std::vector<std::string>::const_iterator it = facility.archiveSearch().begin();
for (; it != facility.archiveSearch().end(); ++it)
g_log.debug() << "get archive search for the facility..." << *it << "\n";
archs.push_back(ArchiveSearchFactory::Instance().create(*it));
}
}
}
// Do we need to try and form a filename from our preset rules
std::string filename(hint);
std::string extension;
if (hintPath.depth() == 0)
{
std::size_t i = filename.find_last_of('.');
if (i != std::string::npos)
{
extension = filename.substr(i);
filename.erase(i);
}
try
{
filename = makeFileName(filename, facility);
}
catch(std::invalid_argument&)
{
if (filename.length() >= hint.length())
{
g_log.error() << "Could not find file '" << filename << "'\n";
}
// Look first at the original filename then for case variations. This is important
// on platforms where file names ARE case sensitive.
std::set<std::string> filenames;
filenames.insert(filename);
if (globOption == Poco::Glob::GLOB_CASELESS)
std::transform(filename.begin(),filename.end(),filename.begin(),toupper);
if (globOption == Poco::Glob::GLOB_CASELESS)
std::transform(filename.begin(),filename.end(),filename.begin(),tolower);
// work through the extensions
// try the extension that comes with the filename
if (!extension.empty())
{
g_log.debug() << "Attempt to find files with the extension that comes with the filename " << extension << "\n";
std::string path = getPath(archs, filenames, std::vector<std::string>(1, extension));
if (!path.empty())
{
g_log.information() << "found path = " << path << '\n';
return path;
} else {
g_log.information() << "Unable to find files with extensions that comes with the filename" << "\n";
// try the extension that are supplied by user
if (!exts.empty())
g_log.debug() << "Attempt to find files with extensions that are supplied by users or algorithms, first extension = " << *(exts.begin()) << "\n";
std::string path = getPath(archs, filenames, exts);
if (!path.empty())
g_log.information() << "found path = " << path << '\n';
return path;
} else {
g_log.information() << "Unable to find files with extensions that are supplied by users or algorithms" << "\n";
// work through the extensions
const std::vector<std::string> facility_extensions = facility.extensions();
// select allowed extensions
std::vector < std::string > extensions;
g_log.debug() << "Add facility extensions defined in the Facility.xml file" << "\n";
extensions.assign(facility_extensions.begin(), facility_extensions.end());
std::string path = getPath(archs, filenames, extensions);
if (!path.empty())
g_log.information() << "found path = " << path << '\n';
return path;
} else {
g_log.information() << "Unable to find files with extensions that are defined in the Facilities.xml file" << "\n";
g_log.information() << "Unable to find file path for " << hint << "\n";
return "";
}
Campbell, Stuart
committed
/**
* Find a list of files file given a hint. Calls findRun internally.
* @param hint :: Comma separated list of hints to findRun method.
* Can also include ranges of runs, e.g. 123-135 or equivalently 123-35.
* Only the beginning of a range can contain an instrument name.
* @return A vector of full paths or empty vector
* @throw std::invalid_argument if the argument is malformed
* @throw Exception::NotFoundError if a file could not be found
Campbell, Stuart
committed
*/
std::vector<std::string> FileFinderImpl::findRuns(const std::string& hint) const
g_log.debug() << "findRuns hint = " << hint << "\n";
Campbell, Stuart
committed
std::vector < std::string > res;
Poco::StringTokenizer hints(hint, ",",
Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
Poco::StringTokenizer::Iterator h = hints.begin();
Campbell, Stuart
committed
for (; h != hints.end(); ++h)
Campbell, Stuart
committed
// Quick check for a filename
bool fileSuspected = false;
// Assume if the hint contains either a "/" or "\" it is a filename..
if ((*h).find("\\") != std::string::npos)
Campbell, Stuart
committed
fileSuspected = true;
}
if ((*h).find("/") != std::string::npos)
{
fileSuspected = true;
if ((*h).find(ALLOWED_SUFFIX) != std::string::npos)
{
fileSuspected = true;
}
Campbell, Stuart
committed
Poco::StringTokenizer range(*h, "-",
Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
if ((range.count() > 2) && (!fileSuspected))
{
throw std::invalid_argument("Malformed range of runs: " + *h);
}
else if ((range.count() == 2) && (!fileSuspected))
{
std::pair < std::string, std::string > p1 = toInstrumentAndNumber(range[0]);
std::string run = p1.second;
size_t nZero = run.size(); // zero padding
if (range[1].size() > nZero)
{
throw std::invalid_argument("Malformed range of runs: " + *h
Campbell, Stuart
committed
+ ". The end of string value is longer than the instrument's zero padding");
}
int runNumber = boost::lexical_cast<int>(run);
std::string runEnd = run;
// Adds zero padding to end of range.
Campbell, Stuart
committed
runEnd.replace(runEnd.end() - range[1].size(), runEnd.end(), range[1]);
// Throw if runEnd contains something else other than a digit.
boost::regex digits("[0-9]+");
if (!boost::regex_match(runEnd, digits))
throw std::invalid_argument("Malformed range of runs: Part of the run has a non-digit character in it.");
Campbell, Stuart
committed
int runEndNumber = boost::lexical_cast<int>(runEnd);
if (runEndNumber < runNumber)
{
throw std::invalid_argument("Malformed range of runs: " + *h);
}
for (int irun = runNumber; irun <= runEndNumber; ++irun)
{
run = boost::lexical_cast<std::string>(irun);
while (run.size() < nZero)
run.insert(0, "0");
std::string path = findRun(p1.first + run);
Campbell, Stuart
committed
if (!path.empty())
{
res.push_back(path);
}
else
{
throw Kernel::Exception::NotFoundError("Unable to find file:", run);
}
Campbell, Stuart
committed
}
}
else
{
std::string path = findRun(*h);
Campbell, Stuart
committed
if (!path.empty())
{
res.push_back(path);
}
else
{
throw Kernel::Exception::NotFoundError("Unable to find file:", *h);
}
Campbell, Stuart
committed
}
Campbell, Stuart
committed
return res;
}
/**
* Return the path to the file found in archive
* @param archs :: A full file name (without path) including extension
* @param fName :: A full file name (without path) including extension
* @return The full path if the file exists and can be found in one of the search locations
* or an empty string otherwise.
*/
std::string FileFinderImpl::getArchivePath(const std::vector<IArchiveSearch_sptr>& archs, const std::string& fName) const
g_log.debug() << "getArchivePath(" << fName << ")\n";
std::string path = "";
std::vector<IArchiveSearch_sptr>::const_iterator it = archs.begin();
for (; it != archs.end(); ++it)
path = (*it)->getPath(fName);
if (!path.empty())
{
return path;
}
catch (...){ }
/**
* Return the full path to the file given its name
* @param fName :: A vector of full file name (without path) and a vector of extensions
* @return The full path if the file exists and can be found in one of the search locations
* or an empty string otherwise.
*/
std::string FileFinderImpl::getPath(const std::vector<IArchiveSearch_sptr>& archs, const std::set<std::string>& filenames, const std::vector<std::string>& exts) const
{
std::string path;
std::vector<std::string> extensions;
extensions.assign(exts.begin(),exts.end());
// Remove wild cards.
extensions.begin(), extensions.end(),
containsWildCard),
extensions.end());
const std::vector<std::string> & searchPaths =
Kernel::ConfigService::Instance().getDataSearchDirs();
// Before we try any globbing, make sure we exhaust all reasonable attempts at constructing the possible filename.
// Avoiding the globbing of getFullPath() for as long as possible will help performance when calling findRuns()
// with a large range of files, especially when searchPaths consists of folders containing a large number of runs.
for(auto ext = extensions.begin(); ext != extensions.end(); ++ext)
{
for(auto filename = filenames.begin(); filename != filenames.end(); ++filename)
{
for(auto searchPath = searchPaths.begin(); searchPath != searchPaths.end(); ++searchPath)
{
try
{
Poco::Path path(*searchPath, *filename + *ext);
Poco::File file(path);
if (file.exists())
return path.toString();
} catch (Poco::Exception &) { /* File does not exist, just carry on. */ }
for (auto ext = extensions.begin(); ext != extensions.end(); ++ext)
{
std::set<std::string>::const_iterator it = filenames.begin();
for(; it!=filenames.end(); ++it)
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
{
path = getFullPath(*it + *ext);
try
{
if (!path.empty() && Poco::File(path).exists())
{
g_log.debug() << "path returned from getFullPath() = " << path << '\n';
return path;
}
}
catch(std::exception& e)
{
g_log.error() << "Cannot open file " << path << ": " << e.what() << '\n';
return "";
}
}
}
// Search the archive
if (archs.size() != 0 )
{
g_log.debug() << "Search the archive of the default facility" << "\n";
std::string path = "";
std::vector<std::string>::const_iterator ext = extensions.begin();
for (; ext != extensions.end(); ++ext)
{
std::set<std::string>::const_iterator it = filenames.begin();
for(; it!=filenames.end(); ++it)
{
path = getArchivePath(archs, *it + *ext);
try
{
if (!path.empty() && Poco::File(path).exists())
{
return path;
}
}
catch(std::exception& e)
{
g_log.error() << "Cannot open file " << path << ": " << e.what() << '\n';
return "";
}
} // it
} // ext
} // archs
return "";
}