-
Steve Williams authoredSteve Williams authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
LoadLog.cpp 23.22 KiB
//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "LoadRaw/isisraw2.h"
#include "MantidAPI/FileProperty.h"
#include "MantidDataHandling/LoadLog.h"
#include "MantidDataObjects/Workspace2D.h"
#include "MantidKernel/ArrayProperty.h"
#include "MantidKernel/Glob.h"
#include "MantidKernel/LogParser.h"
#include "MantidKernel/Strings.h"
#include "MantidKernel/PropertyWithValue.h"
#include "MantidKernel/TimeSeriesProperty.h"
#include <boost/algorithm/string.hpp>
#include <Poco/File.h>
#include <Poco/Path.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/DateTimeParser.h>
#include <Poco/DateTimeFormat.h>
#include <Poco/RegularExpression.h>
#include <fstream> // used to get ifstream
#include <sstream>
namespace Mantid
{
namespace DataHandling
{
// Register the algorithm into the algorithm factory
DECLARE_ALGORITHM(LoadLog)
/// Sets documentation strings for this algorithm
void LoadLog::initDocs()
{
this->setWikiSummary("Load ISIS log file(s) into a [[workspace]]. ");
this->setOptionalMessage("Load ISIS log file(s) into a workspace.");
}
using namespace Kernel;
using API::WorkspaceProperty;
using API::MatrixWorkspace;
using API::MatrixWorkspace_sptr;
using API::FileProperty;
using DataObjects::Workspace2D;
using DataObjects::Workspace2D_sptr;
/// Empty default constructor
LoadLog::LoadLog()
{}
/// Initialisation method.
void LoadLog::init()
{
// When used as a sub-algorithm the workspace name is not used - hence the "Anonymous" to satisfy the validator
declareProperty(
new WorkspaceProperty<MatrixWorkspace>("Workspace","Anonymous",Direction::InOut),
"The name of the workspace to which the log data will be added");
std::vector<std::string> exts(4, "");
exts[0] = ".txt";
exts[1] = ".raw";
exts[2] = ".s*";
exts[3] = ".add";
declareProperty(new FileProperty("Filename", "", FileProperty::Load, exts),
"The filename (including its full or relative path) of either \n"
"an ISIS log file, a multi-column SNS-style text file, or an ISIS raw file. \n"
"If a raw file is specified all log files associated with\n"
"that raw file are loaded into the specified workspace. The file extension must\n"
"either be .raw or .s when specifying a raw file");
declareProperty(new ArrayProperty<std::string>("Names"),
"For SNS-style log files only: the names of each column's log, separated by commas.\n"
"This must be one fewer than the number of columns in the file.");
declareProperty(new ArrayProperty<std::string>("Units"),
"For SNS-style log files only: the units of each column's log, separated by commas.\n"
"This must be one fewer than the number of columns in the file.\n"
"Optional: leave blank for no units in any log.");
}
//@cond NODOC
namespace
{
struct FileMatcher
{
FileMatcher(const std::string & expression) : m_expression(expression) {}
bool operator()(const std::string & test) const
{
Poco::RegularExpression regex(m_expression, Poco::RegularExpression::RE_CASELESS);
return regex.match(test);
}
private:
FileMatcher();
const std::string m_expression;
};
}
//@endcond
/** Check if the file is SNS text; load it if it is, return false otherwise.
*
* @return true if the file was a SNS style; false otherwise.
*/
bool LoadLog::LoadSNSText()
{
// Get the SNS-specific parameter
std::vector<std::string> names = getProperty("Names");
std::vector<std::string> units = getProperty("Units");
// Get the input workspace and retrieve run from workspace.
// the log file(s) will be loaded into the run object of the workspace
const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");
// open log file
std::ifstream inLogFile(m_filename.c_str());
// Get the first line
std::string aLine;
if (!Mantid::Kernel::extractToEOL(inLogFile,aLine))
return false;
std::vector<double> cols;
bool ret = SNSTextFormatColumns(aLine, cols);
// Any error?
if (!ret || cols.size() < 2)
return false;
size_t numCols = static_cast<size_t>(cols.size()-1);
if (names.size() != numCols)
throw std::invalid_argument("The Names parameter should have one fewer entry as the number of columns in a SNS-style text log file.");
if ((units.size() > 0) && (units.size() != numCols))
throw std::invalid_argument("The Units parameter should have either 0 entries or one fewer entry as the number of columns in a SNS-style text log file.");
// Ok, create all the logs
std::vector<TimeSeriesProperty<double>*> props;
for(size_t i=0; i < numCols; i++)
{
TimeSeriesProperty<double>* p = new TimeSeriesProperty<double>(names[i]);
if (units.size() == numCols)
p->setUnits(units[i]);
props.push_back(p);
}
// Go back to start
inLogFile.seekg(0);
while(Mantid::Kernel::extractToEOL(inLogFile,aLine))
{
if (aLine.size() == 0)
break;
if (SNSTextFormatColumns(aLine, cols))
{
if (cols.size() == numCols+1)
{
DateAndTime time(cols[0], 0.0);
for(size_t i=0; i<numCols; i++)
props[i]->addValue(time, cols[i+1]);
}
else
throw std::runtime_error("Inconsistent number of columns while reading SNS-style text file.");
}
else
throw std::runtime_error("Error while reading columns in SNS-style text file.");
}
// Now add all the full logs to the workspace
for(size_t i=0; i < numCols; i++)
{
std::string name = props[i]->name();
if (localWorkspace->mutableRun().hasProperty(name))
{
localWorkspace->mutableRun().removeLogData(name);
g_log.information() << "Log data named " << name << " already existed and was overwritten.\n";
}
localWorkspace->mutableRun().addLogData(props[i]);
}
return true;
}
/** Executes the algorithm. Reading in ISIS log file(s)
*
* @throw Mantid::Kernel::Exception::FileError Thrown if file is not recognised to be a raw datafile or log file
* @throw std::runtime_error Thrown with Workspace problems
*/
void LoadLog::exec()
{
// Retrieve the filename from the properties and perform some initial checks on the filename
m_filename = getPropertyValue("Filename");
// File property checks whether the given path exists, just check that is actually a file
Poco::File l_path( m_filename );
if ( l_path.isDirectory() )
{
g_log.error("In LoadLog: " + m_filename + " must be a filename not a directory.");
throw Exception::FileError("Filename is a directory:" , m_filename);
}
// Get the input workspace and retrieve run from workspace.
// the log file(s) will be loaded into the run object of the workspace
const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");
// If m_filename is the filename of a raw datafile then search for potential log files
// in the directory of this raw datafile. Otherwise check if m_filename is a potential
// log file. Add the filename of these potential log files to: potentialLogFiles.
std::set<std::string> potentialLogFiles;
// start the process or populating potential log files into the container: potentialLogFiles
std::string l_filenamePart = Poco::Path(l_path.path()).getFileName();// get filename part only
bool rawFile = false;// Will be true if Filename property is a name of a RAW file
if ( isAscii(m_filename) )
{
// Is it a SNS style file? If so, we load it and abort.
if (LoadSNSText())
return;
// Otherwise we continue.
}
if ( isAscii(m_filename) && l_filenamePart.find("_") != std::string::npos )
{
// then we will assume that m_filename is an ISIS/SNS log file
potentialLogFiles.insert(m_filename);
}
else
{
// then we will assume that m_filename is an ISIS raw file. The file validator will have warned the user if the
// extension is not one of the suggested ones
rawFile = true;
// strip out the raw data file identifier
std::string l_rawID("");
size_t idx = l_filenamePart.rfind('.');
if( idx != std::string::npos )
{
l_rawID = l_filenamePart.substr(0, l_filenamePart.rfind('.'));
}
else
{
l_rawID = l_filenamePart;
}
/// check for alternate data stream exists for raw file
/// if exists open the stream and read log files name from ADS
if(adsExists())
{
potentialLogFiles = getLogfilenamesfromADS();
}
else
{
// look for log files in the directory of the raw datafile
std::string pattern(l_rawID + "_*.txt");
Poco::Path dir(m_filename);
dir.makeParent();
try
{
Kernel::Glob::glob(Poco::Path(dir).resolve(pattern),potentialLogFiles);
}
catch(std::exception &)
{
}
if( potentialLogFiles.empty() )
{
Poco::RegularExpression regex(l_rawID + "_.*\\.txt", Poco::RegularExpression::RE_CASELESS );
Poco::DirectoryIterator end_iter;
for ( Poco::DirectoryIterator dir_itr(Poco::Path(m_filename).parent()); dir_itr != end_iter; ++dir_itr )
{
if ( !Poco::File(dir_itr->path() ).isFile() ) continue;
l_filenamePart = Poco::Path(dir_itr->path()).getFileName();
if ( regex.match(l_filenamePart) )
{
potentialLogFiles.insert( dir_itr->path() );
}
}
}
}
//.if a .log file exists in the raw file directory
std::string threecolumnLogfile = getThreeColumnName();
if( !threecolumnLogfile.empty() )
{
std::set<std::string> blockFileNameList=createthreecolumnFileLogProperty(threecolumnLogfile,localWorkspace->mutableRun());
//remove the file name from potential logfiles list if it's there in the .log file.
std::set<std::string>::const_iterator itr;
for(itr=blockFileNameList.begin();itr!=blockFileNameList.end();++itr)
{
std::set<std::string>::iterator litr= find(potentialLogFiles.begin(),potentialLogFiles.end(),*itr);
if(litr!=potentialLogFiles.end())
{
potentialLogFiles.erase(litr);
}
}
}
}
//If there are no log files by now, we have nothing else to do
if( potentialLogFiles.empty() ) return;
//Do a quick search for the icpevent file
std::string icpevent_file_name("");
std::set<std::string>::const_iterator icpfile = find_if(potentialLogFiles.begin(), potentialLogFiles.end(), FileMatcher(std::string(".*icpevent.*")));
if( icpfile != potentialLogFiles.end() )
{
icpevent_file_name = *icpfile;
}
Kernel::LogParser parser(icpevent_file_name);
// Add mantid-created logs
m_periods=parser.getPeriodsProperty();
localWorkspace->mutableRun().addLogData(parser.createAllPeriodsLog());
localWorkspace->mutableRun().addLogData(parser.createRunningLog());
// Extract the common part of log file names (the workspace name)
std::string ws_name = Poco::Path(m_filename).getFileName();
ws_name.erase(ws_name.find_last_of('.'));
ws_name += '_';
size_t n_common_chars = ws_name.size();
// Attempt to load the content of each potential log file into the Sample object
std::set<std::string>::const_iterator logs_end = potentialLogFiles.end();
for(std::set<std::string>::const_iterator logs_itr = potentialLogFiles.begin(); logs_itr != logs_end; ++logs_itr)
{
std::string filename = *logs_itr;
// open log file
std::ifstream inLogFile(filename.c_str());
if (!inLogFile)
{
// Unable to open file...
g_log.warning("Unable to open file " + filename);
// ...go on to the next one
continue;
}
// figure out if second column is a number or a string
std::string aLine;
if( Mantid::Kernel::extractToEOL(inLogFile,aLine) )
{
if ( !isDateTimeString(aLine) )
{
g_log.warning("File" + filename + " is not a standard ISIS log file. Expected to be a two column file.");
inLogFile.close();
continue;
}
std::string DateAndTime;
std::stringstream ins(aLine);
ins >> DateAndTime;
// read in what follows the date-time string in the log file and figure out
// what type it is
std::string whatType;
ins >> whatType;
kind l_kind = classify(whatType);
if ( LoadLog::string != l_kind && LoadLog::number != l_kind )
{
g_log.warning("ISIS log file contains unrecognised second column entries: " + filename);
inLogFile.close();
continue;
}
try
{
// Make the property name by removing the workspce name and file extension from the log filename
std::string log_name = Poco::Path(Poco::Path(filename).getFileName()).getBaseName();
if (rawFile)
{
log_name.erase(0, n_common_chars);
}
Property* log = parser.createLogProperty(*logs_itr,stringToLower(log_name));
if (log)
{
localWorkspace->mutableRun().addLogData(log);
}
}
catch(std::exception&)
{
continue;
}
}
inLogFile.close();
} // end for
// operation was a success and ended normally
return;
}
/** Return the name of the three column log file if we have one.
* @returns A string containing the full log file path to a three column log file if one exists. An empty string otherwise.
*/
std::string LoadLog::getThreeColumnName() const
{
std::string rawID;
std::string::size_type dot = m_filename.rfind(".");
if( dot != std::string::npos)
{
rawID = m_filename.substr(0, dot);
}
// append .log to get the .log file name
std::string logfileName=rawID+".log";
int count=0;
if (Poco::File(logfileName).exists())
{
//validate the file
std::ifstream inLogFile(logfileName.c_str());
if (!inLogFile)
{
throw Exception::FileError("Unable to open file:" ,logfileName );
}
//check if first 19 characters of a string is data-time string according to yyyy-mm-ddThh:mm:ss
std::string aLine;
kind l_kind(LoadLog::empty);
while(Mantid::Kernel::extractToEOL(inLogFile,aLine))
{
if ( !isDateTimeString(aLine) )
{ g_log.warning("File" + logfileName + " is not a standard ISIS log file. Expected to be a file starting with DateTime String format.");
inLogFile.close();
return "";
}
std::stringstream ins(aLine);
std::string firstcolumn;
ins >> firstcolumn;
// read in what follows the date-time string in the log file and figure out
// what type it is
std::string secondcolumn;
ins >> secondcolumn;
l_kind = classify(secondcolumn);
if ( LoadLog::string != l_kind )
{
g_log.warning("ISIS log file contains unrecognised second column entries: " + logfileName);
inLogFile.close();
return "";
}
std::string thirdcolumn;
ins>>thirdcolumn;
l_kind = classify(thirdcolumn);
if ( LoadLog::string != l_kind && LoadLog::number!=l_kind)
{
g_log.warning("ISIS log file contains unrecognised third column entries: " + logfileName);
inLogFile.close();
return "";
}
++count;
if(count==2) ///reading first two lines from file for validation purpose.
break;
}
return logfileName;
}
else return "";
}
/* this method looks for ADS with name checksum exists
* @return True if ADS stream checksum exists
*/
bool LoadLog::adsExists()
{
std::string adsname(m_filename+":checksum");
std::ifstream adstream(adsname.c_str());
if(!adstream)
{return false;
}
return true;
}
/* this method reads the checksum ADS associated with the
* raw file and returns the filensmes of the log files
* @return list of logfile names.
*/
std::set<std::string> LoadLog::getLogfilenamesfromADS()
{
std::string adsname(m_filename+":checksum");
std::ifstream adstream(adsname.c_str());
if(!adstream)
return std::set<std::string>();
std::string str;
std::string path;
std::string logFile;
std::set<std::string>logfilesList;
Poco::Path logpath(m_filename);
std::string p=logpath.home();
size_t pos =m_filename.find_last_of("/");
if(pos==std::string::npos)
{
pos =m_filename.find_last_of("\\");
}
if(pos!=std::string::npos)
path=m_filename.substr(0,pos);
while(Mantid::Kernel::extractToEOL(adstream,str))
{
std::string fileName;
pos = str.find("*");
if(pos==std::string::npos)
continue;
fileName=str.substr(pos+1,str.length()-pos);
pos= fileName.find("txt");
if(pos==std::string::npos)
continue;
logFile=path+"/"+fileName;
if(logFile.empty())
continue;
logfilesList.insert(logFile);
}
return logfilesList;
}
/** This method reads the.log file and creates timeseries property and sets that to the run object
* @param logfile :: three column log(.log) file name.
* @param run :: The run information object
* @returns list of logfiles which exists as blockname in the .log file
*/
std::set<std::string> LoadLog::createthreecolumnFileLogProperty(const std::string& logfile,API::Run& run)
{
std::set<std::string> blockFileNameList;
std::string sdata,str;
std::string propname;
Mantid::Kernel::TimeSeriesProperty<double>* logd=0;
Mantid::Kernel::TimeSeriesProperty<std::string>* logs=0;
std::map<std::string,Kernel::TimeSeriesProperty<double>*> dMap;
std::map<std::string,Kernel::TimeSeriesProperty<std::string>*> sMap;
typedef std::pair<std::string,Kernel::TimeSeriesProperty<double>* > dpair;
typedef std::pair<std::string,Kernel::TimeSeriesProperty<std::string>* > spair;
std::string path = m_filename;
std::string::size_type pos=m_filename.rfind(".");
if( pos != std::string::npos )
{
path = path.substr(0, pos);
}
bool isNumeric(false);
std::ifstream file(logfile.c_str());
if (!file)
{
g_log.warning()<<"Cannot open log file "<<logfile<<"\n";
return std::set<std::string>();
}
while(Mantid::Kernel::extractToEOL(file,str))
{
if (!Kernel::TimeSeriesProperty<double>::isTimeString(str) || (str[0]=='#'))
{ //if the line doesn't start with a time read the next line
continue;
}
std::stringstream line(str);
std::string timecolumn;
line>>timecolumn;
std::string blockcolumn;
line>>blockcolumn;
std::string valuecolumn;
line>>valuecolumn;
sdata=valuecolumn;
/////column two in .log file is called block column
/////if any .txt file with rawfilename_blockcolumn.txt exists
///// donot load that txt files
///// blockFileNameList conatins the file names to be removed from potentiallogfiles list.
propname=stringToLower(blockcolumn);
//check if the data is numeric
std::istringstream istr(valuecolumn);
double dvalue;
istr >> dvalue;
isNumeric = !istr.fail();
if (isNumeric)
{
std::map<std::string,Kernel::TimeSeriesProperty<double>*>::iterator ditr=dMap.find(propname);
if(ditr!=dMap.end())
{
Kernel::TimeSeriesProperty<double>* p=ditr->second;
if (p) p->addValue(timecolumn,dvalue);
}
else
{
logd = new Kernel::TimeSeriesProperty<double>(propname);
logd->addValue(timecolumn,dvalue);
dMap.insert(dpair(propname,logd));
std::string blockcolumnFileName=path+"_"+blockcolumn+".txt";
if(blockcolumnFileExists(blockcolumnFileName))
{
blockFileNameList.insert(blockcolumnFileName);
}
}
}
else
{
std::map<std::string,Kernel::TimeSeriesProperty<std::string>*>::iterator sitr=sMap.find(propname);
if(sitr!=sMap.end())
{
Kernel::TimeSeriesProperty<std::string>* prop=sitr->second;
if (prop) prop->addValue(timecolumn,valuecolumn);
}
else
{
logs = new Kernel::TimeSeriesProperty<std::string>(propname);
logs->addValue(timecolumn,valuecolumn);
sMap.insert(spair(propname,logs));
}
std::string blockcolumnFileName=path+"_"+blockcolumn+".txt";
if(blockcolumnFileExists(blockcolumnFileName))
{
blockFileNameList.insert(blockcolumnFileName);
}
}
}
try
{
std::map<std::string,Kernel::TimeSeriesProperty<double>*>::const_iterator itr=dMap.begin();
for(;itr!=dMap.end();++itr)
{
run.addLogData(itr->second);
}
std::map<std::string,Kernel::TimeSeriesProperty<std::string>*>::const_iterator sitr=sMap.begin();
for(;sitr!=sMap.end();++sitr)
{
run.addLogData(sitr->second);
}
}
catch(std::invalid_argument &e)
{
g_log.warning()<<e.what();
}
catch(Exception::ExistsError&e)
{
g_log.warning()<<e.what();
}
return blockFileNameList;
}
/** this method looks for file with second column(block column) name exists in the raw file directory
* @param fileName :: -name of the file
* @return True if the file exists
*/
bool LoadLog::blockcolumnFileExists(const std::string& fileName)
{
if (Poco::File(fileName).exists()) return true;
else return false;
}
/** Takes as input a string and try to determine what type it is.
* @param s :: The input string
* @param s :: string to be classified
* @return A enum kind which tells what type the string is
*/
LoadLog::kind LoadLog::classify(const std::string& s) const
{
if( s.empty() )
{
return LoadLog::empty;
}
using std::string;
const string lower("abcdefghijklmnopqrstuvwxyz");
const string upper("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
const string letters = lower + upper + '_';
if (letters.find_first_of(s) != string::npos)
{
return LoadLog::string;
}
else
{
return LoadLog::number;
}
}
/** change each element of the string to lower case
* @param strToConvert :: The input string
* @returns The string but with all characters in lower case
*/
std::string LoadLog::stringToLower(std::string strToConvert)
{
std::transform(strToConvert.begin(), strToConvert.end(), strToConvert.begin(), tolower);
return strToConvert;
}
/** Checks whether filename is a simple text file
* @param filename :: The filename to inspect
* @returns true if the filename has the .txt extension
*/
bool LoadLog::isAscii(const std::string& filename)
{
FILE* file = fopen(filename.c_str(), "rb");
char data[256];
size_t n = fread(data, 1, sizeof(data), file);
fclose(file);
char *pend = &data[n];
/*
* Call it a binary file if we find a non-ascii character in the
* first 256 bytes of the file.
*/
for( char *p = data; p < pend; ++p )
{
unsigned long ch = (unsigned long)*p;
if( !(ch <= 0x7F) )
{
return false;
}
}
return true;
}
/** check if first 19 characters of a string is date-time string according to yyyy-mm-ddThh:mm:ss
* @param str :: The string to test
* @returns true if the strings format matched the expected date format
*/
bool LoadLog::isDateTimeString(const std::string& str) const
{
return DateAndTime::string_isISO8601(str.substr(0,19));
}
/** Read a line of a SNS-style text file.
*
* @param str :: The string to test
* @param out :: a vector that will be filled with the double values.
* @return false if the format is NOT SNS style or a conversion failed.
*/
bool LoadLog::SNSTextFormatColumns(const std::string& str, std::vector<double> & out) const
{
std::vector<std::string> strs;
out.clear();
boost::split(strs, str, boost::is_any_of("\t "));
double val;
// Every column must evaluate to a double
for (size_t i=0; i<strs.size(); i++)
{
if (!Strings::convert<double>(strs[i],val))
return false;
else
out.push_back(val);
}
// Nothing failed = it is that format.
return true;
}
} // namespace DataHandling
} // namespace Mantid