Skip to content
Snippets Groups Projects
LoadLog.cpp 18.5 KiB
Newer Older
//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "LoadRaw/isisraw2.h"
#include "MantidAPI/FileProperty.h"
#include "MantidDataHandling/LoadLog.h"
#include "MantidDataObjects/Workspace2D.h"
#include "MantidKernel/ArrayProperty.h"
#include "MantidKernel/Glob.h"
#include "MantidKernel/LogParser.h"
#include "MantidKernel/Strings.h"
#include "MantidKernel/PropertyWithValue.h"
#include "MantidKernel/TimeSeriesProperty.h"
#include <boost/regex.hpp>
#include <boost/algorithm/string.hpp>
Campbell, Stuart's avatar
Campbell, Stuart committed
#include <Poco/File.h>
#include <Poco/Path.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/DateTimeParser.h>
#include <Poco/DateTimeFormat.h>
#include <fstream>  // used to get ifstream
#include <sstream>

namespace Mantid
{
  namespace DataHandling
  {
    // Register the algorithm into the algorithm factory
    DECLARE_ALGORITHM(LoadLog)
    using namespace Kernel;
    using API::WorkspaceProperty;
    using API::MatrixWorkspace;
    using API::MatrixWorkspace_sptr;
    using API::FileProperty;
    using DataObjects::Workspace2D;
    using DataObjects::Workspace2D_sptr;
    /// Empty default constructor
    LoadLog::LoadLog()
    {}
    /// Initialisation method.
    void LoadLog::init()
    {
      // When used as a Child Algorithm the workspace name is not used - hence the "Anonymous" to satisfy the validator
      declareProperty(
        new WorkspaceProperty<MatrixWorkspace>("Workspace","Anonymous",Direction::InOut),
                          "The name of the workspace to which the log data will be added.");

      std::vector<std::string> exts(2, "");
      exts[0] = ".txt";
      exts[1] = ".log";
      declareProperty(new FileProperty("Filename", "", FileProperty::Load, exts),
        "The filename (including its full or relative path) of a SNS text log file (not cvinfo), "
        "an ISIS log file, or an ISIS raw file. "
        "If a raw file is specified all log files associated with "
        "that raw file are loaded into the specified workspace. The file extension must "
        "either be .raw or .s when specifying a raw file");

      declareProperty(new ArrayProperty<std::string>("Names"),
        "For SNS-style log files only: the names of each column's log, separated by commas. "
        "This must be one fewer than the number of columns in the file.");

      declareProperty(new ArrayProperty<std::string>("Units"),
        "For SNS-style log files only: the units of each column's log, separated by commas. "
        "This must be one fewer than the number of columns in the file. "
        "Optional: leave blank for no units in any log.");
      declareProperty("NumberOfColumns", Mantid::EMPTY_INT(), "Number of columns in the file. If not set Mantid will attempt to guess.");
    /**
     * Executes the algorithm. Reading in ISIS log file(s)
     * @throw Mantid::Kernel::Exception::FileError  Thrown if file is not recognised to be a raw datafile or log file
     * @throw std::runtime_error Thrown with Workspace problems
     */
    void LoadLog::exec()
      // Retrieve the filename from the properties and perform some initial checks on the filename
      m_filename = getPropertyValue("Filename");
      // Get the log file names if provided.
      std::vector<std::string> names = getProperty("Names");
      // Open file, in order to pass it once to all functions that will load it.
      std::ifstream logFileStream(m_filename.c_str());
      // File property checks whether the given path exists, just check that is actually a file
      Poco::File l_path( m_filename );
      if ( l_path.isDirectory() )
      {
        throw Exception::FileError("Filename is a directory:" , m_filename);
      }
      // Get the input workspace and retrieve run from workspace.
      // the log file(s) will be loaded into the run object of the workspace
      const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");
      if ( isAscii(m_filename) )
      {
        // Is it a SNS style file? If so, we load it and abort.
        if ( LoadSNSText() )
        {
          return;
        } // Otherwise we continue.
      }
      // If there's more than one log name provided, then it's an invalid ISIS file.
      if (names.size() > 1)
        throw std::invalid_argument("More than one log name provided. Invalid ISIS log file.");
      int colNum =  static_cast<int>(getProperty("NumberOfColumns"));
      if(colNum == Mantid::EMPTY_INT())
        colNum = countNumberColumns(logFileStream, m_filename);

      switch(colNum)
        case 2:
          loadTwoColumnLogFile(logFileStream, extractLogName(names), localWorkspace->mutableRun());
          break;
        case 3:
          loadThreeColumnLogFile(logFileStream, m_filename, localWorkspace->mutableRun());
          break;
        default:
          throw std::invalid_argument("The log file provided is invalid as it has less than 2 or more than three columns.");
          break;
    /**
     * Load an ISIS log file into the local workspace.
     * @param logFileStream :: The stream of the log file (data).
     * @param logFileName :: The name of the log file to load.
     * @param run :: The run information object
    void LoadLog::loadTwoColumnLogFile(std::ifstream& logFileStream, std::string logFileName, API::Run& run)
        throw std::invalid_argument("Unable to open file " + m_filename);
      // figure out if second column is a number or a string
      std::string aLine;
      if( Mantid::Kernel::Strings::extractToEOL(logFileStream,aLine) )
        if ( !isDateTimeString(aLine) )
        {
          throw std::invalid_argument("File" + m_filename + " is not a standard ISIS log file. Expected to be a two column file.");
        }

        std::string DateAndTime;
        std::stringstream ins(aLine);
        ins >> DateAndTime;

        // read in what follows the date-time string in the log file and figure out what type it is
        std::string whatType;
        ins >> whatType;
        kind l_kind = classify(whatType);

        if (LoadLog::string != l_kind && LoadLog::number != l_kind)
          throw std::invalid_argument("ISIS log file contains unrecognised second column entries: " + m_filename);
        }

        try
        {
          Property* log = LogParser::createLogProperty(m_filename,stringToLower(logFileName));
          if (log)
          {
    }

    /**
     * reads the .log stream and creates timeseries property and sets that to the run object
     * @param logFileStream :: The stream of the log file (data).
     * @param logFileName :: The name of the log file to load.
     * @param run :: The run information object
     */
    void LoadLog::loadThreeColumnLogFile(std::ifstream& logFileStream, std::string logFileName, API::Run& run)
    {
      std::string str;
      std::string propname;
      Mantid::Kernel::TimeSeriesProperty<double>* logd = 0;
      Mantid::Kernel::TimeSeriesProperty<std::string>* logs = 0;
      std::map<std::string,Kernel::TimeSeriesProperty<double>*> dMap;
      std::map<std::string,Kernel::TimeSeriesProperty<std::string>*> sMap;
      typedef std::pair<std::string,Kernel::TimeSeriesProperty<double>* > dpair;
      typedef std::pair<std::string,Kernel::TimeSeriesProperty<std::string>* > spair;
      kind l_kind(LoadLog::empty);
      bool isNumeric(false);

      if (!logFileStream)
      {
        throw std::invalid_argument("Unable to open file " + m_filename);
      while(Mantid::Kernel::Strings::extractToEOL(logFileStream,str))
          throw std::invalid_argument("File" + logFileName + " is not a standard ISIS log file. Expected to be a file starting with DateTime String format.");
        }

        if (!Kernel::TimeSeriesProperty<double>::isTimeString(str) || (str[0]=='#'))
        {    //if the line doesn't start with a time read the next line
          continue;
        }

        std::stringstream line(str);
        std::string timecolumn;
        line >> timecolumn;

        std::string blockcolumn;
        line >> blockcolumn;
        l_kind = classify(blockcolumn);

        if ( LoadLog::string != l_kind )
        {
          throw std::invalid_argument("ISIS log file contains unrecognised second column entries:" + logFileName);
        }

        std::string valuecolumn;
        line >> valuecolumn;
        l_kind = classify(valuecolumn);

        if ( LoadLog::string != l_kind && LoadLog::number != l_kind)
        {
          continue; //no value defined, just skip this entry
        }

        // column two in .log file is called block column
        propname = stringToLower(blockcolumn);
        //check if the data is numeric
        std::istringstream istr(valuecolumn);
        double dvalue;
        istr >> dvalue;
        isNumeric = !istr.fail();

        if (isNumeric)
        {
          std::map<std::string,Kernel::TimeSeriesProperty<double>*>::iterator ditr = dMap.find(propname);
          if(ditr != dMap.end())
          {
            Kernel::TimeSeriesProperty<double>* prop = ditr->second;
            if (prop) prop->addValue(timecolumn,dvalue);
          }
          else
          {
            logd = new Kernel::TimeSeriesProperty<double>(propname);
            logd->addValue(timecolumn,dvalue);
            dMap.insert(dpair(propname,logd));
          }
        }
        else
        {
          std::map<std::string,Kernel::TimeSeriesProperty<std::string>*>::iterator sitr = sMap.find(propname);
          if(sitr != sMap.end())
          {
            Kernel::TimeSeriesProperty<std::string>* prop = sitr->second;
            if (prop) prop->addValue(timecolumn,valuecolumn);
          }
          else
          {
            logs = new Kernel::TimeSeriesProperty<std::string>(propname);
            logs->addValue(timecolumn,valuecolumn);
            sMap.insert(spair(propname,logs));
          }
        }
      }
      try
      {
        std::map<std::string,Kernel::TimeSeriesProperty<double>*>::const_iterator itr = dMap.begin();
        for(;itr != dMap.end(); ++itr)
        {
          run.addLogData(itr->second);
        }
        std::map<std::string,Kernel::TimeSeriesProperty<std::string>*>::const_iterator sitr = sMap.begin();
        for(;sitr!=sMap.end();++sitr)
        {
          run.addLogData(sitr->second);
        }
      }
      catch(std::invalid_argument &e)
      {
        g_log.warning() << e.what();
      }
      catch(Exception::ExistsError&e)
      {
        g_log.warning() << e.what();
      }
    /**
     * Check if log file property name has been set. If not set, return the
     * workspace + log file name (e.g. HRP37129_ICPevent). Otherwise return first log name.
     * @param logName :: The vector containing log file names.
     * @return The name of the log file.
     */
    std::string LoadLog::extractLogName(const std::vector<std::string> & logName)
      if(logName.empty())
        return (Poco::Path(Poco::Path(m_filename).getFileName()).getBaseName());
      {
        return (logName.front());
      }
    /**
     * Check if the file is SNS text; load it if it is, return false otherwise.
     * @return true if the file was a SNS style; false otherwise.
     */
    bool LoadLog::LoadSNSText()
      // Get the SNS-specific parameter
      std::vector<std::string> names = getProperty("Names");
      std::vector<std::string> units = getProperty("Units");

      // Get the input workspace and retrieve run from workspace.
      // the log file(s) will be loaded into the run object of the workspace
      const MatrixWorkspace_sptr localWorkspace = getProperty("Workspace");

      // open log file
      std::ifstream inLogFile(m_filename.c_str());

      // Get the first line
      std::string aLine;
      if (!Mantid::Kernel::Strings::extractToEOL(inLogFile,aLine))
        return false;

      std::vector<double> cols;
      bool ret = SNSTextFormatColumns(aLine, cols);
      // Any error?
      if (!ret || cols.size() < 2)
        return false;

      size_t numCols = static_cast<size_t>(cols.size()-1);
      if (names.size() != numCols)
        throw std::invalid_argument("The Names parameter should have one fewer entry as the number of columns in a SNS-style text log file.");
      if ((!units.empty()) && (units.size() != numCols))
        throw std::invalid_argument("The Units parameter should have either 0 entries or one fewer entry as the number of columns in a SNS-style text log file.");

      // Ok, create all the logs
      std::vector<TimeSeriesProperty<double>*> props;
      for(size_t i=0; i < numCols; i++)
        TimeSeriesProperty<double>* p = new TimeSeriesProperty<double>(names[i]);
        if (units.size() == numCols)
          p->setUnits(units[i]);
        props.push_back(p);
      // Go back to start
      inLogFile.seekg(0);
      while(Mantid::Kernel::Strings::extractToEOL(inLogFile,aLine))
      {
        if (aLine.size() == 0)
          break;
        if (SNSTextFormatColumns(aLine, cols))
        {
          if (cols.size() == numCols+1)
          {
            DateAndTime time(cols[0], 0.0);
            for(size_t i=0; i<numCols; i++)
              props[i]->addValue(time, cols[i+1]);
          }
          else
            throw std::runtime_error("Inconsistent number of columns while reading SNS-style text file.");
        }
        else
          throw std::runtime_error("Error while reading columns in SNS-style text file.");
      }
      // Now add all the full logs to the workspace
      for(size_t i=0; i < numCols; i++)
        std::string name = props[i]->name();
        if (localWorkspace->mutableRun().hasProperty(name))
        {
          localWorkspace->mutableRun().removeLogData(name);
          g_log.information() << "Log data named " << name << " already existed and was overwritten.\n";
        }
        localWorkspace->mutableRun().addLogData(props[i]);

    /**
     * Takes as input a string and try to determine what type it is.
     * @param s :: The input string
     * @param s ::  string to be classified
     * @return A enum kind which tells what type the string is
     */
    LoadLog::kind LoadLog::classify(const std::string& s) const
    {
      if( s.empty() )
      {
        return LoadLog::empty;
      }
      using std::string;
      const string lower("abcdefghijklmnopqrstuvwxyz");
      const string upper("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
      const string letters = lower + upper + '_';
      if (letters.find_first_of(s) != string::npos)
      {
        return LoadLog::string;
      }
      else
      {
        return LoadLog::number;
      }
    }
    /**
     * Change each element of the string to lower case
     * @param strToConvert :: The input string
     * @returns The string but with all characters in lower case
     */
    std::string LoadLog::stringToLower(std::string strToConvert)
    {
      std::transform(strToConvert.begin(), strToConvert.end(), strToConvert.begin(), tolower);
      return strToConvert;
    }
    /**
     * Checks whether filename is a simple text file
     * @param filename :: The filename to inspect
     * @returns true if the filename has the .txt extension
     */
    bool LoadLog::isAscii(const std::string& filename)
      FILE* file = fopen(filename.c_str(), "rb");
      char data[256];
      size_t n = fread(data, 1, sizeof(data), file);
      fclose(file);
      char *pend = &data[n];
      /*
       * Call it a binary file if we find a non-ascii character in the
       * first 256 bytes of the file.
       */
      for( char *p = data;  p < pend; ++p )
      {
        unsigned long ch = (unsigned long)*p;
        if( !(ch <= 0x7F) )
        {
          return false;
        }

      }
      return true;
    /**
     * Check if first 19 characters of a string is date-time string according to yyyy-mm-ddThh:mm:ss
     * @param str :: The string to test
     * @returns true if the strings format matched the expected date format
     */
    bool LoadLog::isDateTimeString(const std::string& str) const
    {
      return DateAndTime::stringIsISO8601(str.substr(0,19));
    }
    /**
     * Read a line of a SNS-style text file.
     * @param str :: The string to test
     * @param out :: a vector that will be filled with the double values.
     * @return false if the format is NOT SNS style or a conversion failed.
     */
    bool LoadLog::SNSTextFormatColumns(const std::string& str, std::vector<double> & out) const
    {
      std::vector<std::string> strs;
      out.clear();
      boost::split(strs, str, boost::is_any_of("\t "));
      double val;
      // Every column must evaluate to a double
      for (size_t i=0; i<strs.size(); i++)
      {
        if (!Strings::convert<double>(strs[i],val))
          return false;
        else
          out.push_back(val);
      }
      // Nothing failed = it is that format.
      return true;
    }
    /**
     * Count the number of columns in the first line of the text file
     * @param logFileStream :: stream to the file
     * @param logFileName :: name for the log file
     */
    int LoadLog::countNumberColumns(std::ifstream& logFileStream, const std::string& logFileName)
    {
      if (!logFileStream)
      {
        throw std::invalid_argument("Unable to open file " + m_filename);
      }

      std::string str;
      kind l_kind(LoadLog::empty);

      //extract first line of file
      Mantid::Kernel::Strings::extractToEOL(logFileStream,str);

      if ( !isDateTimeString(str) )
      {
        throw std::invalid_argument("File" + logFileName + " is not a standard ISIS log file. Expected to be a file starting with DateTime String format.");
      }

      std::stringstream line(str);
      std::string timecolumn;
      line >> timecolumn;

      std::string blockcolumn;
      line >> blockcolumn;
      l_kind = classify(blockcolumn);

      if ( LoadLog::string != l_kind && LoadLog::number != l_kind )
      {
        throw std::invalid_argument("ISIS log file contains unrecognised second column entries:" + logFileName);
      }

      std::string valuecolumn;
      line >> valuecolumn;
      l_kind = classify(valuecolumn);

      //reset file back to the beginning
      logFileStream.seekg(0);

      if ( LoadLog::string != l_kind && LoadLog::number != l_kind)
      {
        return 2; //looks like a two column file
      }
      else
      {
        return 3; //looks like a three column file
      }
    }

  } // namespace DataHandling
} // namespace Mantid