-
Campbell, Stuart authoredCampbell, Stuart authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
LoadLog.cpp 17.67 KiB
//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "LoadRaw/isisraw2.h"
#include "MantidAPI/FileProperty.h"
#include "MantidDataHandling/LoadLog.h"
#include "MantidDataObjects/Workspace2D.h"
#include "MantidKernel/ArrayProperty.h"
#include "MantidKernel/Glob.h"
#include "MantidKernel/LogParser.h"
#include "MantidKernel/Strings.h"
#include "MantidKernel/PropertyWithValue.h"
#include "MantidKernel/TimeSeriesProperty.h"
#include <boost/regex.hpp>
#include <boost/algorithm/string.hpp>
#include <Poco/File.h>
#include <Poco/Path.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/DateTimeParser.h>
#include <Poco/DateTimeFormat.h>
#include <fstream> // used to get ifstream
#include <sstream>
namespace Mantid {
namespace DataHandling {
// Register the algorithm into the algorithm factory
DECLARE_ALGORITHM(LoadLog)
using namespace Kernel;
using API::WorkspaceProperty;
using API::MatrixWorkspace;
using API::MatrixWorkspace_sptr;
using API::FileProperty;
using DataObjects::Workspace2D;
using DataObjects::Workspace2D_sptr;
/// Empty default constructor
LoadLog::LoadLog() {}
/// Initialisation method.
void LoadLog::init() {
// When used as a Child Algorithm the workspace name is not used - hence the
// "Anonymous" to satisfy the validator
declareProperty(
new WorkspaceProperty<MatrixWorkspace>("Workspace", "Anonymous",
Direction::InOut),
"The name of the workspace to which the log data will be added.");
std::vector<std::string> exts(2, "");
exts[0] = ".txt";
exts[1] = ".log";
declareProperty(new FileProperty("Filename", "", FileProperty::Load, exts),
"The filename (including its full or relative path) of a SNS "
"text log file (not cvinfo), "
"an ISIS log file, or an ISIS raw file. "
"If a raw file is specified all log files associated with "
"that raw file are loaded into the specified workspace. The "
"file extension must "
"either be .raw or .s when specifying a raw file");
declareProperty(
new ArrayProperty<std::string>("Names"),
"For SNS-style log files only: the names of each column's log, separated "
"by commas. "
"This must be one fewer than the number of columns in the file.");
declareProperty(
new ArrayProperty<std::string>("Units"),
"For SNS-style log files only: the units of each column's log, separated "
"by commas. "
"This must be one fewer than the number of columns in the file. "
"Optional: leave blank for no units in any log.");
declareProperty("NumberOfColumns", Mantid::EMPTY_INT(),
"Number of columns in the file. If not set Mantid will "
"attempt to guess.");
}
/**
* Executes the algorithm. Reading in ISIS log file(s)
* @throw Mantid::Kernel::Exception::FileError Thrown if file is not recognised
* to be a raw datafile or log file
* @throw std::runtime_error Thrown with Workspace problems
*/
void LoadLog::exec() {
// Retrieve the filename from the properties and perform some initial checks
// on the filename
m_filename = getPropertyValue("Filename");
// Get the log file names if provided.
std::vector<std::string> names = getProperty("Names");
// Open file, in order to pass it once to all functions that will load it.
std::ifstream logFileStream(m_filename.c_str());
// File property checks whether the given path exists, just check that is
// actually a file
Poco::File l_path(m_filename);
if (l_path.isDirectory()) {
throw Exception::FileError("Filename is a directory:", m_filename);
}
// Get the input workspace and retrieve run from workspace.
// the log file(s) will be loaded into the run object of the workspace
const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");
if (isAscii(m_filename)) {
// Is it a SNS style file? If so, we load it and abort.
if (LoadSNSText()) {
return;
} // Otherwise we continue.
}
// If there's more than one log name provided, then it's an invalid ISIS file.
if (names.size() > 1) {
throw std::invalid_argument(
"More than one log name provided. Invalid ISIS log file.");
}
int colNum = static_cast<int>(getProperty("NumberOfColumns"));
if (colNum == Mantid::EMPTY_INT()) {
colNum = countNumberColumns(logFileStream, m_filename);
}
switch (colNum) {
case 2:
loadTwoColumnLogFile(logFileStream, extractLogName(names),
localWorkspace->mutableRun());
break;
case 3:
loadThreeColumnLogFile(logFileStream, m_filename,
localWorkspace->mutableRun());
break;
default:
throw std::invalid_argument("The log file provided is invalid as it has "
"less than 2 or more than three columns.");
break;
}
}
/**
* Load an ISIS log file into the local workspace.
* @param logFileStream :: The stream of the log file (data).
* @param logFileName :: The name of the log file to load.
* @param run :: The run information object
*/
void LoadLog::loadTwoColumnLogFile(std::ifstream &logFileStream,
std::string logFileName, API::Run &run) {
if (!logFileStream) {
throw std::invalid_argument("Unable to open file " + m_filename);
}
// figure out if second column is a number or a string
std::string aLine;
if (Mantid::Kernel::Strings::extractToEOL(logFileStream, aLine)) {
if (!isDateTimeString(aLine)) {
throw std::invalid_argument("File" + m_filename +
" is not a standard ISIS log file. Expected "
"to be a two column file.");
}
std::string DateAndTime;
std::stringstream ins(aLine);
ins >> DateAndTime;
// read in what follows the date-time string in the log file and figure out
// what type it is
std::string whatType;
ins >> whatType;
kind l_kind = classify(whatType);
if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
throw std::invalid_argument(
"ISIS log file contains unrecognised second column entries: " +
m_filename);
}
try {
Property *log =
LogParser::createLogProperty(m_filename, stringToLower(logFileName));
if (log) {
run.addLogData(log);
}
} catch (std::exception &) {
}
}
}
/**
* reads the .log stream and creates timeseries property and sets that to the
* run object
* @param logFileStream :: The stream of the log file (data).
* @param logFileName :: The name of the log file to load.
* @param run :: The run information object
*/
void LoadLog::loadThreeColumnLogFile(std::ifstream &logFileStream,
std::string logFileName, API::Run &run) {
std::string str;
std::string propname;
Mantid::Kernel::TimeSeriesProperty<double> *logd = 0;
Mantid::Kernel::TimeSeriesProperty<std::string> *logs = 0;
std::map<std::string, Kernel::TimeSeriesProperty<double> *> dMap;
std::map<std::string, Kernel::TimeSeriesProperty<std::string> *> sMap;
typedef std::pair<std::string, Kernel::TimeSeriesProperty<double> *> dpair;
typedef std::pair<std::string, Kernel::TimeSeriesProperty<std::string> *>
spair;
kind l_kind(LoadLog::empty);
bool isNumeric(false);
if (!logFileStream) {
throw std::invalid_argument("Unable to open file " + m_filename);
}
while (Mantid::Kernel::Strings::extractToEOL(logFileStream, str)) {
if (!isDateTimeString(str)) {
throw std::invalid_argument("File" + logFileName +
" is not a standard ISIS log file. Expected "
"to be a file starting with DateTime String "
"format.");
}
if (!Kernel::TimeSeriesProperty<double>::isTimeString(str) ||
(str[0] ==
'#')) { // if the line doesn't start with a time read the next line
continue;
}
std::stringstream line(str);
std::string timecolumn;
line >> timecolumn;
std::string blockcolumn;
line >> blockcolumn;
l_kind = classify(blockcolumn);
if (LoadLog::string != l_kind) {
throw std::invalid_argument(
"ISIS log file contains unrecognised second column entries:" +
logFileName);
}
std::string valuecolumn;
line >> valuecolumn;
l_kind = classify(valuecolumn);
if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
continue; // no value defined, just skip this entry
}
// column two in .log file is called block column
propname = stringToLower(blockcolumn);
// check if the data is numeric
std::istringstream istr(valuecolumn);
double dvalue;
istr >> dvalue;
isNumeric = !istr.fail();
if (isNumeric) {
std::map<std::string, Kernel::TimeSeriesProperty<double> *>::iterator
ditr = dMap.find(propname);
if (ditr != dMap.end()) {
Kernel::TimeSeriesProperty<double> *prop = ditr->second;
if (prop)
prop->addValue(timecolumn, dvalue);
} else {
logd = new Kernel::TimeSeriesProperty<double>(propname);
logd->addValue(timecolumn, dvalue);
dMap.insert(dpair(propname, logd));
}
} else {
std::map<std::string, Kernel::TimeSeriesProperty<std::string> *>::iterator
sitr = sMap.find(propname);
if (sitr != sMap.end()) {
Kernel::TimeSeriesProperty<std::string> *prop = sitr->second;
if (prop)
prop->addValue(timecolumn, valuecolumn);
} else {
logs = new Kernel::TimeSeriesProperty<std::string>(propname);
logs->addValue(timecolumn, valuecolumn);
sMap.insert(spair(propname, logs));
}
}
}
try {
std::map<std::string, Kernel::TimeSeriesProperty<double> *>::const_iterator
itr = dMap.begin();
for (; itr != dMap.end(); ++itr) {
run.addLogData(itr->second);
}
std::map<std::string,
Kernel::TimeSeriesProperty<std::string> *>::const_iterator sitr =
sMap.begin();
for (; sitr != sMap.end(); ++sitr) {
run.addLogData(sitr->second);
}
} catch (std::invalid_argument &e) {
g_log.warning() << e.what();
} catch (Exception::ExistsError &e) {
g_log.warning() << e.what();
}
}
/**
* Check if log file property name has been set. If not set, return the
* workspace + log file name (e.g. HRP37129_ICPevent). Otherwise return first
* log name.
* @param logName :: The vector containing log file names.
* @return The name of the log file.
*/
std::string LoadLog::extractLogName(const std::vector<std::string> &logName) {
if (logName.empty()) {
return (Poco::Path(Poco::Path(m_filename).getFileName()).getBaseName());
} else {
return (logName.front());
}
}
/**
* Check if the file is SNS text; load it if it is, return false otherwise.
* @return true if the file was a SNS style; false otherwise.
*/
bool LoadLog::LoadSNSText() {
// Get the SNS-specific parameter
std::vector<std::string> names = getProperty("Names");
std::vector<std::string> units = getProperty("Units");
// Get the input workspace and retrieve run from workspace.
// the log file(s) will be loaded into the run object of the workspace
const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");
// open log file
std::ifstream inLogFile(m_filename.c_str());
// Get the first line
std::string aLine;
if (!Mantid::Kernel::Strings::extractToEOL(inLogFile, aLine))
return false;
std::vector<double> cols;
bool ret = SNSTextFormatColumns(aLine, cols);
// Any error?
if (!ret || cols.size() < 2)
return false;
size_t numCols = static_cast<size_t>(cols.size() - 1);
if (names.size() != numCols)
throw std::invalid_argument("The Names parameter should have one fewer "
"entry as the number of columns in a SNS-style "
"text log file.");
if ((!units.empty()) && (units.size() != numCols))
throw std::invalid_argument("The Units parameter should have either 0 "
"entries or one fewer entry as the number of "
"columns in a SNS-style text log file.");
// Ok, create all the logs
std::vector<TimeSeriesProperty<double> *> props;
for (size_t i = 0; i < numCols; i++) {
TimeSeriesProperty<double> *p = new TimeSeriesProperty<double>(names[i]);
if (units.size() == numCols)
p->setUnits(units[i]);
props.push_back(p);
}
// Go back to start
inLogFile.seekg(0);
while (Mantid::Kernel::Strings::extractToEOL(inLogFile, aLine)) {
if (aLine.size() == 0)
break;
if (SNSTextFormatColumns(aLine, cols)) {
if (cols.size() == numCols + 1) {
DateAndTime time(cols[0], 0.0);
for (size_t i = 0; i < numCols; i++)
props[i]->addValue(time, cols[i + 1]);
} else
throw std::runtime_error("Inconsistent number of columns while reading "
"SNS-style text file.");
} else
throw std::runtime_error(
"Error while reading columns in SNS-style text file.");
}
// Now add all the full logs to the workspace
for (size_t i = 0; i < numCols; i++) {
std::string name = props[i]->name();
if (localWorkspace->mutableRun().hasProperty(name)) {
localWorkspace->mutableRun().removeLogData(name);
g_log.information() << "Log data named " << name
<< " already existed and was overwritten.\n";
}
localWorkspace->mutableRun().addLogData(props[i]);
}
return true;
}
/**
* Takes as input a string and try to determine what type it is.
* @param s :: The input string
* @param s :: string to be classified
* @return A enum kind which tells what type the string is
*/
LoadLog::kind LoadLog::classify(const std::string &s) const {
if (s.empty()) {
return LoadLog::empty;
}
using std::string;
const string lower("abcdefghijklmnopqrstuvwxyz");
const string upper("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
const string letters = lower + upper + '_';
if (letters.find_first_of(s) != string::npos) {
return LoadLog::string;
} else {
return LoadLog::number;
}
}
/**
* Change each element of the string to lower case
* @param strToConvert :: The input string
* @returns The string but with all characters in lower case
*/
std::string LoadLog::stringToLower(std::string strToConvert) {
std::transform(strToConvert.begin(), strToConvert.end(), strToConvert.begin(),
tolower);
return strToConvert;
}
/**
* Checks whether filename is a simple text file
* @param filename :: The filename to inspect
* @returns true if the filename has the .txt extension
*/
bool LoadLog::isAscii(const std::string &filename) {
FILE *file = fopen(filename.c_str(), "rb");
char data[256];
size_t n = fread(data, 1, sizeof(data), file);
fclose(file);
char *pend = &data[n];
/*
* Call it a binary file if we find a non-ascii character in the
* first 256 bytes of the file.
*/
for (char *p = data; p < pend; ++p) {
unsigned long ch = (unsigned long)*p;
if (!(ch <= 0x7F)) {
return false;
}
}
return true;
}
/**
* Check if first 19 characters of a string is date-time string according to
* yyyy-mm-ddThh:mm:ss
* @param str :: The string to test
* @returns true if the strings format matched the expected date format
*/
bool LoadLog::isDateTimeString(const std::string &str) const {
return DateAndTime::stringIsISO8601(str.substr(0, 19));
}
/**
* Read a line of a SNS-style text file.
* @param str :: The string to test
* @param out :: a vector that will be filled with the double values.
* @return false if the format is NOT SNS style or a conversion failed.
*/
bool LoadLog::SNSTextFormatColumns(const std::string &str,
std::vector<double> &out) const {
std::vector<std::string> strs;
out.clear();
boost::split(strs, str, boost::is_any_of("\t "));
double val;
// Every column must evaluate to a double
for (size_t i = 0; i < strs.size(); i++) {
if (!Strings::convert<double>(strs[i], val))
return false;
else
out.push_back(val);
}
// Nothing failed = it is that format.
return true;
}
/**
* Count the number of columns in the first line of the text file
* @param logFileStream :: stream to the file
* @param logFileName :: name for the log file
*/
int LoadLog::countNumberColumns(std::ifstream &logFileStream,
const std::string &logFileName) {
if (!logFileStream) {
throw std::invalid_argument("Unable to open file " + m_filename);
}
std::string str;
kind l_kind(LoadLog::empty);
// extract first line of file
Mantid::Kernel::Strings::extractToEOL(logFileStream, str);
if (!isDateTimeString(str)) {
throw std::invalid_argument("File" + logFileName +
" is not a standard ISIS log file. Expected to "
"be a file starting with DateTime String "
"format.");
}
std::stringstream line(str);
std::string timecolumn;
line >> timecolumn;
std::string blockcolumn;
line >> blockcolumn;
l_kind = classify(blockcolumn);
if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
throw std::invalid_argument(
"ISIS log file contains unrecognised second column entries:" +
logFileName);
}
std::string valuecolumn;
line >> valuecolumn;
l_kind = classify(valuecolumn);
// reset file back to the beginning
logFileStream.seekg(0);
if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
return 2; // looks like a two column file
} else {
return 3; // looks like a three column file
}
}
} // namespace DataHandling
} // namespace Mantid