Skip to content
Snippets Groups Projects
LoadLog.cpp 17.1 KiB
Newer Older
//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "LoadRaw/isisraw2.h"
#include "MantidAPI/FileProperty.h"
#include "MantidDataHandling/LoadLog.h"
#include "MantidDataObjects/Workspace2D.h"
#include "MantidKernel/ArrayProperty.h"
#include "MantidKernel/Glob.h"
#include "MantidKernel/LogParser.h"
#include "MantidKernel/make_unique.h"
#include "MantidKernel/Strings.h"
#include "MantidKernel/PropertyWithValue.h"
#include "MantidKernel/TimeSeriesProperty.h"
#include <boost/regex.hpp>
#include <boost/algorithm/string.hpp>
Campbell, Stuart's avatar
Campbell, Stuart committed
#include <Poco/File.h>
#include <Poco/Path.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/DateTimeParser.h>
#include <Poco/DateTimeFormat.h>
#include <fstream> // used to get ifstream
#include <sstream>

namespace Mantid {
namespace DataHandling {
// Register the algorithm into the algorithm factory
DECLARE_ALGORITHM(LoadLog)

using namespace Kernel;
using API::WorkspaceProperty;
using API::MatrixWorkspace;
using API::MatrixWorkspace_sptr;
using API::FileProperty;
using DataObjects::Workspace2D;
using DataObjects::Workspace2D_sptr;

/// Empty default constructor
LoadLog::LoadLog() {}

/// Initialisation method.
void LoadLog::init() {
  // When used as a Child Algorithm the workspace name is not used - hence the
  // "Anonymous" to satisfy the validator
  declareProperty(
      new WorkspaceProperty<MatrixWorkspace>("Workspace", "Anonymous",
                                             Direction::InOut),
      "The name of the workspace to which the log data will be added.");

  declareProperty(
      new FileProperty("Filename", "", FileProperty::Load, {".txt", ".log"}),
      "The filename (including its full or relative path) of a SNS "
      "text log file (not cvinfo), "
      "an ISIS log file, or an ISIS raw file. "
      "If a raw file is specified all log files associated with "
      "that raw file are loaded into the specified workspace. The "
      "file extension must "
      "either be .raw or .s when specifying a raw file");

  declareProperty(
      new ArrayProperty<std::string>("Names"),
      "For SNS-style log files only: the names of each column's log, separated "
      "by commas. "
      "This must be one fewer than the number of columns in the file.");

  declareProperty(
      new ArrayProperty<std::string>("Units"),
      "For SNS-style log files only: the units of each column's log, separated "
      "by commas. "
      "This must be one fewer than the number of columns in the file. "
      "Optional: leave blank for no units in any log.");

  declareProperty("NumberOfColumns", Mantid::EMPTY_INT(),
                  "Number of columns in the file. If not set Mantid will "
                  "attempt to guess.");
}

/**
 * Executes the algorithm. Reading in ISIS log file(s)
 * @throw Mantid::Kernel::Exception::FileError  Thrown if file is not recognised
 * to be a raw datafile or log file
 * @throw std::runtime_error Thrown with Workspace problems
 */
void LoadLog::exec() {
  // Retrieve the filename from the properties and perform some initial checks
  // on the filename
  m_filename = getPropertyValue("Filename");
  // Get the log file names if provided.
  std::vector<std::string> names = getProperty("Names");
  // Open file, in order to pass it once to all functions that will load it.
  std::ifstream logFileStream(m_filename.c_str());

  // File property checks whether the given path exists, just check that is
  // actually a file
  Poco::File l_path(m_filename);
  if (l_path.isDirectory()) {
    throw Exception::FileError("Filename is a directory:", m_filename);
  }

  // Get the input workspace and retrieve run from workspace.
  // the log file(s) will be loaded into the run object of the workspace
  const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");

  if (isAscii(m_filename)) {
    // Is it a SNS style file? If so, we load it and abort.
    if (LoadSNSText()) {
      return;
    } // Otherwise we continue.
  }

  // If there's more than one log name provided, then it's an invalid ISIS file.
  if (names.size() > 1) {
    throw std::invalid_argument(
        "More than one log name provided. Invalid ISIS log file.");
  }

  int colNum = static_cast<int>(getProperty("NumberOfColumns"));

  if (colNum == Mantid::EMPTY_INT()) {
    colNum = countNumberColumns(logFileStream, m_filename);
  }

  switch (colNum) {
  case 2:
    loadTwoColumnLogFile(logFileStream, extractLogName(names),
                         localWorkspace->mutableRun());
    break;
  case 3:
    loadThreeColumnLogFile(logFileStream, m_filename,
                           localWorkspace->mutableRun());
    break;
  default:
    throw std::invalid_argument("The log file provided is invalid as it has "
                                "less than 2 or more than three columns.");
    break;
  }
}

/**
 * Load an ISIS log file into the local workspace.
 * @param logFileStream :: The stream of the log file (data).
 * @param logFileName :: The name of the log file to load.
 * @param run :: The run information object
 */
void LoadLog::loadTwoColumnLogFile(std::ifstream &logFileStream,
                                   std::string logFileName, API::Run &run) {
  if (!logFileStream) {
    throw std::invalid_argument("Unable to open file " + m_filename);
  }

  // figure out if second column is a number or a string
  std::string aLine;
  if (Mantid::Kernel::Strings::extractToEOL(logFileStream, aLine)) {
    if (!isDateTimeString(aLine)) {
      throw std::invalid_argument("File" + m_filename +
                                  " is not a standard ISIS log file. Expected "
                                  "to be a two column file.");
    std::string DateAndTime;
    std::stringstream ins(aLine);
    ins >> DateAndTime;
    // read in what follows the date-time string in the log file and figure out
    // what type it is
    std::string whatType;
    ins >> whatType;
    kind l_kind = classify(whatType);
    if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
      throw std::invalid_argument(
          "ISIS log file contains unrecognised second column entries: " +
          m_filename);
    try {
      Property *log =
          LogParser::createLogProperty(m_filename, stringToLower(logFileName));
      if (log) {
        run.addLogData(log);
    } catch (std::exception &) {
  }
}

/**
 * reads the .log stream and creates timeseries property and sets that to the
 * run object
 * @param logFileStream :: The stream of the log file (data).
 * @param logFileName :: The name of the log file to load.
 * @param run :: The run information object
 */
void LoadLog::loadThreeColumnLogFile(std::ifstream &logFileStream,
                                     std::string logFileName, API::Run &run) {
  std::string str;
  std::string propname;
  std::map<std::string, std::unique_ptr<Kernel::TimeSeriesProperty<double>>>
      dMap;
  std::map<std::string,
           std::unique_ptr<Kernel::TimeSeriesProperty<std::string>>> sMap;
  kind l_kind(LoadLog::empty);
  bool isNumeric(false);

  if (!logFileStream) {
    throw std::invalid_argument("Unable to open file " + m_filename);
  }

  while (Mantid::Kernel::Strings::extractToEOL(logFileStream, str)) {
    if (!isDateTimeString(str)) {
      throw std::invalid_argument("File" + logFileName +
                                  " is not a standard ISIS log file. Expected "
                                  "to be a file starting with DateTime String "
                                  "format.");
    if (!Kernel::TimeSeriesProperty<double>::isTimeString(str) ||
        (str[0] ==
         '#')) { // if the line doesn't start with a time read the next line
      continue;
    std::stringstream line(str);
    std::string timecolumn;
    line >> timecolumn;
    std::string blockcolumn;
    line >> blockcolumn;
    l_kind = classify(blockcolumn);

    if (LoadLog::string != l_kind) {
      throw std::invalid_argument(
          "ISIS log file contains unrecognised second column entries:" +
          logFileName);
    std::string valuecolumn;
    line >> valuecolumn;
    l_kind = classify(valuecolumn);
    if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
      continue; // no value defined, just skip this entry
    }
    // column two in .log file is called block column
    propname = stringToLower(blockcolumn);
    // check if the data is numeric
    std::istringstream istr(valuecolumn);
    double dvalue;
    istr >> dvalue;
    isNumeric = !istr.fail();

    if (isNumeric) {
      auto ditr = dMap.find(propname);
      if (ditr != dMap.end()) {
        auto prop = ditr->second.get();
        if (prop)
          prop->addValue(timecolumn, dvalue);
      } else {
Hahn, Steven's avatar
Hahn, Steven committed
        auto logd =
            Mantid::Kernel::make_unique<Kernel::TimeSeriesProperty<double>>(
                propname);
        logd->addValue(timecolumn, dvalue);
        dMap.emplace(std::make_pair(propname, std::move(logd)));
      auto sitr = sMap.find(propname);
      if (sitr != sMap.end()) {
        auto prop = sitr->second.get();
        if (prop)
          prop->addValue(timecolumn, valuecolumn);
      } else {
Hahn, Steven's avatar
Hahn, Steven committed
        auto logs = Mantid::Kernel::make_unique<
            Kernel::TimeSeriesProperty<std::string>>(propname);
        logs->addValue(timecolumn, valuecolumn);
        sMap.emplace(std::make_pair(propname, std::move(logs)));
    for (auto itr = dMap.begin(); itr != dMap.end(); ++itr) {
      run.addLogData(itr->second.release());
    for (auto sitr = sMap.begin(); sitr != sMap.end(); ++sitr) {
      run.addLogData(sitr->second.release());
  } catch (std::invalid_argument &e) {
    g_log.warning() << e.what();
  } catch (Exception::ExistsError &e) {
    g_log.warning() << e.what();
  }
}

/**
 * Check if log file property name has been set. If not set, return the
 * workspace + log file name (e.g. HRP37129_ICPevent). Otherwise return first
 * log name.
 * @param logName :: The vector containing log file names.
 * @return The name of the log file.
 */
std::string LoadLog::extractLogName(const std::vector<std::string> &logName) {
  if (logName.empty()) {
    return (Poco::Path(Poco::Path(m_filename).getFileName()).getBaseName());
  } else {
    return (logName.front());
  }
}

/**
 * Check if the file is SNS text; load it if it is, return false otherwise.
 * @return true if the file was a SNS style; false otherwise.
 */
bool LoadLog::LoadSNSText() {

  // Get the SNS-specific parameter
  std::vector<std::string> names = getProperty("Names");
  std::vector<std::string> units = getProperty("Units");

  // Get the input workspace and retrieve run from workspace.
  // the log file(s) will be loaded into the run object of the workspace
  const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");

  // open log file
  std::ifstream inLogFile(m_filename.c_str());

  // Get the first line
  std::string aLine;
  if (!Mantid::Kernel::Strings::extractToEOL(inLogFile, aLine))
    return false;

  std::vector<double> cols;
  bool ret = SNSTextFormatColumns(aLine, cols);
  // Any error?
  if (!ret || cols.size() < 2)
    return false;

  size_t numCols = static_cast<size_t>(cols.size() - 1);
  if (names.size() != numCols)
    throw std::invalid_argument("The Names parameter should have one fewer "
                                "entry as the number of columns in a SNS-style "
                                "text log file.");
  if ((!units.empty()) && (units.size() != numCols))
    throw std::invalid_argument("The Units parameter should have either 0 "
                                "entries or one fewer entry as the number of "
                                "columns in a SNS-style text log file.");

  // Ok, create all the logs
  std::vector<TimeSeriesProperty<double> *> props;
  for (size_t i = 0; i < numCols; i++) {
    auto p = new TimeSeriesProperty<double>(names[i]);
    if (units.size() == numCols)
      p->setUnits(units[i]);
    props.push_back(p);
  }
  // Go back to start
  inLogFile.seekg(0);
  while (Mantid::Kernel::Strings::extractToEOL(inLogFile, aLine)) {
    if (aLine.size() == 0)
      break;

    if (SNSTextFormatColumns(aLine, cols)) {
      if (cols.size() == numCols + 1) {
        DateAndTime time(cols[0], 0.0);
        for (size_t i = 0; i < numCols; i++)
          props[i]->addValue(time, cols[i + 1]);
      } else
        throw std::runtime_error("Inconsistent number of columns while reading "
                                 "SNS-style text file.");
    } else
      throw std::runtime_error(
          "Error while reading columns in SNS-style text file.");
  }
  // Now add all the full logs to the workspace
  for (size_t i = 0; i < numCols; i++) {
    std::string name = props[i]->name();
    if (localWorkspace->mutableRun().hasProperty(name)) {
      localWorkspace->mutableRun().removeLogData(name);
      g_log.information() << "Log data named " << name
                          << " already existed and was overwritten.\n";
    localWorkspace->mutableRun().addLogData(props[i]);
  }

  return true;
}

/**
 * Takes as input a string and try to determine what type it is.
 * @param s :: The input string
 * @param s ::  string to be classified
 * @return A enum kind which tells what type the string is
 */
LoadLog::kind LoadLog::classify(const std::string &s) const {
  if (s.empty()) {
    return LoadLog::empty;
  }

  using std::string;
  const string lower("abcdefghijklmnopqrstuvwxyz");
  const string upper("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
  const string letters = lower + upper + '_';

  if (letters.find_first_of(s) != string::npos) {
    return LoadLog::string;
  } else {
    return LoadLog::number;
  }
}

/**
 * Change each element of the string to lower case
 * @param strToConvert :: The input string
 * @returns The string but with all characters in lower case
 */
std::string LoadLog::stringToLower(std::string strToConvert) {
  std::transform(strToConvert.begin(), strToConvert.end(), strToConvert.begin(),
                 tolower);
  return strToConvert;
}

/**
 * Checks whether filename is a simple text file
 * @param filename :: The filename to inspect
 * @returns true if the filename has the .txt extension
 */
bool LoadLog::isAscii(const std::string &filename) {
  FILE *file = fopen(filename.c_str(), "rb");
  char data[256];
  size_t n = fread(data, 1, sizeof(data), file);
  fclose(file);
  char *pend = &data[n];
  /*
   * Call it a binary file if we find a non-ascii character in the
   * first 256 bytes of the file.
   */
  for (char *p = data; p < pend; ++p) {
    unsigned long ch = static_cast<unsigned long>(*p);
    if (!(ch <= 0x7F)) {
      return false;
  }
  return true;
}

/**
 * Check if first 19 characters of a string is date-time string according to
 * yyyy-mm-ddThh:mm:ss
 * @param str :: The string to test
 * @returns true if the strings format matched the expected date format
 */
bool LoadLog::isDateTimeString(const std::string &str) const {
  return DateAndTime::stringIsISO8601(str.substr(0, 19));
}

/**
 * Read a line of a SNS-style text file.
 * @param str :: The string to test
 * @param out :: a vector that will be filled with the double values.
 * @return false if the format is NOT SNS style or a conversion failed.
 */
bool LoadLog::SNSTextFormatColumns(const std::string &str,
                                   std::vector<double> &out) const {
  std::vector<std::string> strs;
  out.clear();
  boost::split(strs, str, boost::is_any_of("\t "));
  double val;
  // Every column must evaluate to a double
  for (auto &str : strs) {
    if (!Strings::convert<double>(str, val))
      return false;
    else
      out.push_back(val);
  }
  // Nothing failed = it is that format.
  return true;
}

/**
 * Count the number of columns in the first line of the text file
 * @param logFileStream :: stream to the file
 * @param logFileName :: name for the log file
 */
int LoadLog::countNumberColumns(std::ifstream &logFileStream,
                                const std::string &logFileName) {
  if (!logFileStream) {
    throw std::invalid_argument("Unable to open file " + m_filename);
  }

  std::string str;
  kind l_kind(LoadLog::empty);

  // extract first line of file
  Mantid::Kernel::Strings::extractToEOL(logFileStream, str);

  if (!isDateTimeString(str)) {
    throw std::invalid_argument("File" + logFileName +
                                " is not a standard ISIS log file. Expected to "
                                "be a file starting with DateTime String "
                                "format.");
  }

  std::stringstream line(str);
  std::string timecolumn;
  line >> timecolumn;

  std::string blockcolumn;
  line >> blockcolumn;
  l_kind = classify(blockcolumn);

  if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
    throw std::invalid_argument(
        "ISIS log file contains unrecognised second column entries:" +
        logFileName);
  }

  std::string valuecolumn;
  line >> valuecolumn;
  l_kind = classify(valuecolumn);

  // reset file back to the beginning
  logFileStream.seekg(0);

  if (LoadLog::string != l_kind && LoadLog::number != l_kind) {
    return 2; // looks like a two column file
  } else {
    return 3; // looks like a three column file
  }
}

} // namespace DataHandling
} // namespace Mantid